1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2021 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
35 #include "stringpool.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
46 #include "fold-const.h"
48 #include "stor-layout.h"
50 #include "print-tree.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
58 #include "sched-int.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-walk.h"
64 #include "tree-vectorizer.h"
65 #include "tree-ssa-propagate.h"
67 #include "tm-constrs.h"
68 #include "target-globals.h"
70 #include "tree-vector-builder.h"
72 #include "tree-pass.h"
75 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
77 #include "case-cfn-macros.h"
79 #include "rs6000-internal.h"
82 /* This file should be included last. */
83 #include "target-def.h"
85 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
86 systems will also set long double to be IEEE 128-bit. AIX and Darwin
87 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
88 those systems will not pick up this default. This needs to be after all
89 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
91 #ifndef TARGET_IEEEQUAD_DEFAULT
92 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
93 #define TARGET_IEEEQUAD_DEFAULT 1
95 #define TARGET_IEEEQUAD_DEFAULT 0
99 /* Don't enable PC-relative addressing if the target does not support it. */
100 #ifndef PCREL_SUPPORTED_BY_OS
101 #define PCREL_SUPPORTED_BY_OS 0
104 /* Support targetm.vectorize.builtin_mask_for_load. */
105 tree altivec_builtin_mask_for_load
;
108 /* Counter for labels which are to be placed in .fixup. */
109 int fixuplabelno
= 0;
112 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
115 /* Specify the machine mode that pointers have. After generation of rtl, the
116 compiler makes no further distinction between pointers and any other objects
117 of this machine mode. */
118 scalar_int_mode rs6000_pmode
;
121 /* Note whether IEEE 128-bit floating point was passed or returned, either as
122 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
123 floating point. We changed the default C++ mangling for these types and we
124 may want to generate a weak alias of the old mangling (U10__float128) to the
125 new mangling (u9__ieee128). */
126 bool rs6000_passes_ieee128
= false;
129 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
130 name used in current releases (i.e. u9__ieee128). */
131 static bool ieee128_mangling_gcc_8_1
;
133 /* Width in bits of a pointer. */
134 unsigned rs6000_pointer_size
;
136 #ifdef HAVE_AS_GNU_ATTRIBUTE
137 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
138 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
140 /* Flag whether floating point values have been passed/returned.
141 Note that this doesn't say whether fprs are used, since the
142 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
143 should be set for soft-float values passed in gprs and ieee128
144 values passed in vsx registers. */
145 bool rs6000_passes_float
= false;
146 bool rs6000_passes_long_double
= false;
147 /* Flag whether vector values have been passed/returned. */
148 bool rs6000_passes_vector
= false;
149 /* Flag whether small (<= 8 byte) structures have been returned. */
150 bool rs6000_returns_struct
= false;
153 /* Value is TRUE if register/mode pair is acceptable. */
154 static bool rs6000_hard_regno_mode_ok_p
155 [NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
157 /* Maximum number of registers needed for a given register class and mode. */
158 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
160 /* How many registers are needed for a given register and mode. */
161 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
163 /* Map register number to register class. */
164 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
166 static int dbg_cost_ctrl
;
168 /* Built in types. */
169 tree rs6000_builtin_types
[RS6000_BTI_MAX
];
170 tree rs6000_builtin_decls
[RS6000_BUILTIN_COUNT
];
172 /* Flag to say the TOC is initialized */
173 int toc_initialized
, need_toc_init
;
174 char toc_label_name
[10];
176 /* Cached value of rs6000_variable_issue. This is cached in
177 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
178 static short cached_can_issue_more
;
180 static GTY(()) section
*read_only_data_section
;
181 static GTY(()) section
*private_data_section
;
182 static GTY(()) section
*tls_data_section
;
183 static GTY(()) section
*tls_private_data_section
;
184 static GTY(()) section
*read_only_private_data_section
;
185 static GTY(()) section
*sdata2_section
;
187 section
*toc_section
= 0;
189 /* Describe the vector unit used for modes. */
190 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
191 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
193 /* Register classes for various constraints that are based on the target
195 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
197 /* Describe the alignment of a vector. */
198 int rs6000_vector_align
[NUM_MACHINE_MODES
];
200 /* Map selected modes to types for builtins. */
201 tree builtin_mode_to_type
[MAX_MACHINE_MODE
][2];
203 /* What modes to automatically generate reciprocal divide estimate (fre) and
204 reciprocal sqrt (frsqrte) for. */
205 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
207 /* Masks to determine which reciprocal esitmate instructions to generate
209 enum rs6000_recip_mask
{
210 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
211 RECIP_DF_DIV
= 0x002,
212 RECIP_V4SF_DIV
= 0x004,
213 RECIP_V2DF_DIV
= 0x008,
215 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
216 RECIP_DF_RSQRT
= 0x020,
217 RECIP_V4SF_RSQRT
= 0x040,
218 RECIP_V2DF_RSQRT
= 0x080,
220 /* Various combination of flags for -mrecip=xxx. */
222 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
223 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
224 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
226 RECIP_HIGH_PRECISION
= RECIP_ALL
,
228 /* On low precision machines like the power5, don't enable double precision
229 reciprocal square root estimate, since it isn't accurate enough. */
230 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
233 /* -mrecip options. */
236 const char *string
; /* option name */
237 unsigned int mask
; /* mask bits to set */
238 } recip_options
[] = {
239 { "all", RECIP_ALL
},
240 { "none", RECIP_NONE
},
241 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
243 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
244 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
245 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
246 | RECIP_V2DF_RSQRT
) },
247 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
248 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
251 /* On PowerPC, we have a limited number of target clones that we care about
252 which means we can use an array to hold the options, rather than having more
253 elaborate data structures to identify each possible variation. Order the
254 clones from the default to the highest ISA. */
256 CLONE_DEFAULT
= 0, /* default clone. */
257 CLONE_ISA_2_05
, /* ISA 2.05 (power6). */
258 CLONE_ISA_2_06
, /* ISA 2.06 (power7). */
259 CLONE_ISA_2_07
, /* ISA 2.07 (power8). */
260 CLONE_ISA_3_00
, /* ISA 3.0 (power9). */
261 CLONE_ISA_3_1
, /* ISA 3.1 (power10). */
265 /* Map compiler ISA bits into HWCAP names. */
267 HOST_WIDE_INT isa_mask
; /* rs6000_isa mask */
268 const char *name
; /* name to use in __builtin_cpu_supports. */
271 static const struct clone_map rs6000_clone_map
[CLONE_MAX
] = {
272 { 0, "" }, /* Default options. */
273 { OPTION_MASK_CMPB
, "arch_2_05" }, /* ISA 2.05 (power6). */
274 { OPTION_MASK_POPCNTD
, "arch_2_06" }, /* ISA 2.06 (power7). */
275 { OPTION_MASK_P8_VECTOR
, "arch_2_07" }, /* ISA 2.07 (power8). */
276 { OPTION_MASK_P9_VECTOR
, "arch_3_00" }, /* ISA 3.0 (power9). */
277 { OPTION_MASK_POWER10
, "arch_3_1" }, /* ISA 3.1 (power10). */
281 /* Newer LIBCs explicitly export this symbol to declare that they provide
282 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
283 reference to this symbol whenever we expand a CPU builtin, so that
284 we never link against an old LIBC. */
285 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
287 /* True if we have expanded a CPU builtin. */
288 bool cpu_builtin_p
= false;
290 /* Pointer to function (in rs6000-c.c) that can define or undefine target
291 macros that have changed. Languages that don't support the preprocessor
292 don't link in rs6000-c.c, so we can't call it directly. */
293 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
, HOST_WIDE_INT
);
295 /* Simplfy register classes into simpler classifications. We assume
296 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
297 check for standard register classes (gpr/floating/altivec/vsx) and
298 floating/vector classes (float/altivec/vsx). */
300 enum rs6000_reg_type
{
311 /* Map register class to register type. */
312 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
314 /* First/last register type for the 'normal' register types (i.e. general
315 purpose, floating point, altivec, and VSX registers). */
316 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
318 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
321 /* Register classes we care about in secondary reload or go if legitimate
322 address. We only need to worry about GPR, FPR, and Altivec registers here,
323 along an ANY field that is the OR of the 3 register classes. */
325 enum rs6000_reload_reg_type
{
326 RELOAD_REG_GPR
, /* General purpose registers. */
327 RELOAD_REG_FPR
, /* Traditional floating point regs. */
328 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
329 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
333 /* For setting up register classes, loop through the 3 register classes mapping
334 into real registers, and skip the ANY class, which is just an OR of the
336 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
337 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
339 /* Map reload register type to a register in the register class. */
340 struct reload_reg_map_type
{
341 const char *name
; /* Register class name. */
342 int reg
; /* Register in the register class. */
345 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
346 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
347 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
348 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
349 { "Any", -1 }, /* RELOAD_REG_ANY. */
352 /* Mask bits for each register class, indexed per mode. Historically the
353 compiler has been more restrictive which types can do PRE_MODIFY instead of
354 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
355 typedef unsigned char addr_mask_type
;
357 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
358 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
359 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
360 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
361 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
362 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
363 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
364 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
366 /* Register type masks based on the type, of valid addressing modes. */
367 struct rs6000_reg_addr
{
368 enum insn_code reload_load
; /* INSN to reload for loading. */
369 enum insn_code reload_store
; /* INSN to reload for storing. */
370 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
371 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
372 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
373 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
374 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
377 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
379 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
381 mode_supports_pre_incdec_p (machine_mode mode
)
383 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
387 /* Helper function to say whether a mode supports PRE_MODIFY. */
389 mode_supports_pre_modify_p (machine_mode mode
)
391 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
395 /* Return true if we have D-form addressing in altivec registers. */
397 mode_supports_vmx_dform (machine_mode mode
)
399 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
402 /* Return true if we have D-form addressing in VSX registers. This addressing
403 is more limited than normal d-form addressing in that the offset must be
404 aligned on a 16-byte boundary. */
406 mode_supports_dq_form (machine_mode mode
)
408 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
412 /* Given that there exists at least one variable that is set (produced)
413 by OUT_INSN and read (consumed) by IN_INSN, return true iff
414 IN_INSN represents one or more memory store operations and none of
415 the variables set by OUT_INSN is used by IN_INSN as the address of a
416 store operation. If either IN_INSN or OUT_INSN does not represent
417 a "single" RTL SET expression (as loosely defined by the
418 implementation of the single_set function) or a PARALLEL with only
419 SETs, CLOBBERs, and USEs inside, this function returns false.
421 This rs6000-specific version of store_data_bypass_p checks for
422 certain conditions that result in assertion failures (and internal
423 compiler errors) in the generic store_data_bypass_p function and
424 returns false rather than calling store_data_bypass_p if one of the
425 problematic conditions is detected. */
428 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
435 in_set
= single_set (in_insn
);
438 if (MEM_P (SET_DEST (in_set
)))
440 out_set
= single_set (out_insn
);
443 out_pat
= PATTERN (out_insn
);
444 if (GET_CODE (out_pat
) == PARALLEL
)
446 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
448 out_exp
= XVECEXP (out_pat
, 0, i
);
449 if ((GET_CODE (out_exp
) == CLOBBER
)
450 || (GET_CODE (out_exp
) == USE
))
452 else if (GET_CODE (out_exp
) != SET
)
461 in_pat
= PATTERN (in_insn
);
462 if (GET_CODE (in_pat
) != PARALLEL
)
465 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
467 in_exp
= XVECEXP (in_pat
, 0, i
);
468 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
470 else if (GET_CODE (in_exp
) != SET
)
473 if (MEM_P (SET_DEST (in_exp
)))
475 out_set
= single_set (out_insn
);
478 out_pat
= PATTERN (out_insn
);
479 if (GET_CODE (out_pat
) != PARALLEL
)
481 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
483 out_exp
= XVECEXP (out_pat
, 0, j
);
484 if ((GET_CODE (out_exp
) == CLOBBER
)
485 || (GET_CODE (out_exp
) == USE
))
487 else if (GET_CODE (out_exp
) != SET
)
494 return store_data_bypass_p (out_insn
, in_insn
);
498 /* Processor costs (relative to an add) */
500 const struct processor_costs
*rs6000_cost
;
502 /* Instruction size costs on 32bit processors. */
504 struct processor_costs size32_cost
= {
505 COSTS_N_INSNS (1), /* mulsi */
506 COSTS_N_INSNS (1), /* mulsi_const */
507 COSTS_N_INSNS (1), /* mulsi_const9 */
508 COSTS_N_INSNS (1), /* muldi */
509 COSTS_N_INSNS (1), /* divsi */
510 COSTS_N_INSNS (1), /* divdi */
511 COSTS_N_INSNS (1), /* fp */
512 COSTS_N_INSNS (1), /* dmul */
513 COSTS_N_INSNS (1), /* sdiv */
514 COSTS_N_INSNS (1), /* ddiv */
515 32, /* cache line size */
519 0, /* SF->DF convert */
522 /* Instruction size costs on 64bit processors. */
524 struct processor_costs size64_cost
= {
525 COSTS_N_INSNS (1), /* mulsi */
526 COSTS_N_INSNS (1), /* mulsi_const */
527 COSTS_N_INSNS (1), /* mulsi_const9 */
528 COSTS_N_INSNS (1), /* muldi */
529 COSTS_N_INSNS (1), /* divsi */
530 COSTS_N_INSNS (1), /* divdi */
531 COSTS_N_INSNS (1), /* fp */
532 COSTS_N_INSNS (1), /* dmul */
533 COSTS_N_INSNS (1), /* sdiv */
534 COSTS_N_INSNS (1), /* ddiv */
535 128, /* cache line size */
539 0, /* SF->DF convert */
542 /* Instruction costs on RS64A processors. */
544 struct processor_costs rs64a_cost
= {
545 COSTS_N_INSNS (20), /* mulsi */
546 COSTS_N_INSNS (12), /* mulsi_const */
547 COSTS_N_INSNS (8), /* mulsi_const9 */
548 COSTS_N_INSNS (34), /* muldi */
549 COSTS_N_INSNS (65), /* divsi */
550 COSTS_N_INSNS (67), /* divdi */
551 COSTS_N_INSNS (4), /* fp */
552 COSTS_N_INSNS (4), /* dmul */
553 COSTS_N_INSNS (31), /* sdiv */
554 COSTS_N_INSNS (31), /* ddiv */
555 128, /* cache line size */
559 0, /* SF->DF convert */
562 /* Instruction costs on MPCCORE processors. */
564 struct processor_costs mpccore_cost
= {
565 COSTS_N_INSNS (2), /* mulsi */
566 COSTS_N_INSNS (2), /* mulsi_const */
567 COSTS_N_INSNS (2), /* mulsi_const9 */
568 COSTS_N_INSNS (2), /* muldi */
569 COSTS_N_INSNS (6), /* divsi */
570 COSTS_N_INSNS (6), /* divdi */
571 COSTS_N_INSNS (4), /* fp */
572 COSTS_N_INSNS (5), /* dmul */
573 COSTS_N_INSNS (10), /* sdiv */
574 COSTS_N_INSNS (17), /* ddiv */
575 32, /* cache line size */
579 0, /* SF->DF convert */
582 /* Instruction costs on PPC403 processors. */
584 struct processor_costs ppc403_cost
= {
585 COSTS_N_INSNS (4), /* mulsi */
586 COSTS_N_INSNS (4), /* mulsi_const */
587 COSTS_N_INSNS (4), /* mulsi_const9 */
588 COSTS_N_INSNS (4), /* muldi */
589 COSTS_N_INSNS (33), /* divsi */
590 COSTS_N_INSNS (33), /* divdi */
591 COSTS_N_INSNS (11), /* fp */
592 COSTS_N_INSNS (11), /* dmul */
593 COSTS_N_INSNS (11), /* sdiv */
594 COSTS_N_INSNS (11), /* ddiv */
595 32, /* cache line size */
599 0, /* SF->DF convert */
602 /* Instruction costs on PPC405 processors. */
604 struct processor_costs ppc405_cost
= {
605 COSTS_N_INSNS (5), /* mulsi */
606 COSTS_N_INSNS (4), /* mulsi_const */
607 COSTS_N_INSNS (3), /* mulsi_const9 */
608 COSTS_N_INSNS (5), /* muldi */
609 COSTS_N_INSNS (35), /* divsi */
610 COSTS_N_INSNS (35), /* divdi */
611 COSTS_N_INSNS (11), /* fp */
612 COSTS_N_INSNS (11), /* dmul */
613 COSTS_N_INSNS (11), /* sdiv */
614 COSTS_N_INSNS (11), /* ddiv */
615 32, /* cache line size */
619 0, /* SF->DF convert */
622 /* Instruction costs on PPC440 processors. */
624 struct processor_costs ppc440_cost
= {
625 COSTS_N_INSNS (3), /* mulsi */
626 COSTS_N_INSNS (2), /* mulsi_const */
627 COSTS_N_INSNS (2), /* mulsi_const9 */
628 COSTS_N_INSNS (3), /* muldi */
629 COSTS_N_INSNS (34), /* divsi */
630 COSTS_N_INSNS (34), /* divdi */
631 COSTS_N_INSNS (5), /* fp */
632 COSTS_N_INSNS (5), /* dmul */
633 COSTS_N_INSNS (19), /* sdiv */
634 COSTS_N_INSNS (33), /* ddiv */
635 32, /* cache line size */
639 0, /* SF->DF convert */
642 /* Instruction costs on PPC476 processors. */
644 struct processor_costs ppc476_cost
= {
645 COSTS_N_INSNS (4), /* mulsi */
646 COSTS_N_INSNS (4), /* mulsi_const */
647 COSTS_N_INSNS (4), /* mulsi_const9 */
648 COSTS_N_INSNS (4), /* muldi */
649 COSTS_N_INSNS (11), /* divsi */
650 COSTS_N_INSNS (11), /* divdi */
651 COSTS_N_INSNS (6), /* fp */
652 COSTS_N_INSNS (6), /* dmul */
653 COSTS_N_INSNS (19), /* sdiv */
654 COSTS_N_INSNS (33), /* ddiv */
655 32, /* l1 cache line size */
659 0, /* SF->DF convert */
662 /* Instruction costs on PPC601 processors. */
664 struct processor_costs ppc601_cost
= {
665 COSTS_N_INSNS (5), /* mulsi */
666 COSTS_N_INSNS (5), /* mulsi_const */
667 COSTS_N_INSNS (5), /* mulsi_const9 */
668 COSTS_N_INSNS (5), /* muldi */
669 COSTS_N_INSNS (36), /* divsi */
670 COSTS_N_INSNS (36), /* divdi */
671 COSTS_N_INSNS (4), /* fp */
672 COSTS_N_INSNS (5), /* dmul */
673 COSTS_N_INSNS (17), /* sdiv */
674 COSTS_N_INSNS (31), /* ddiv */
675 32, /* cache line size */
679 0, /* SF->DF convert */
682 /* Instruction costs on PPC603 processors. */
684 struct processor_costs ppc603_cost
= {
685 COSTS_N_INSNS (5), /* mulsi */
686 COSTS_N_INSNS (3), /* mulsi_const */
687 COSTS_N_INSNS (2), /* mulsi_const9 */
688 COSTS_N_INSNS (5), /* muldi */
689 COSTS_N_INSNS (37), /* divsi */
690 COSTS_N_INSNS (37), /* divdi */
691 COSTS_N_INSNS (3), /* fp */
692 COSTS_N_INSNS (4), /* dmul */
693 COSTS_N_INSNS (18), /* sdiv */
694 COSTS_N_INSNS (33), /* ddiv */
695 32, /* cache line size */
699 0, /* SF->DF convert */
702 /* Instruction costs on PPC604 processors. */
704 struct processor_costs ppc604_cost
= {
705 COSTS_N_INSNS (4), /* mulsi */
706 COSTS_N_INSNS (4), /* mulsi_const */
707 COSTS_N_INSNS (4), /* mulsi_const9 */
708 COSTS_N_INSNS (4), /* muldi */
709 COSTS_N_INSNS (20), /* divsi */
710 COSTS_N_INSNS (20), /* divdi */
711 COSTS_N_INSNS (3), /* fp */
712 COSTS_N_INSNS (3), /* dmul */
713 COSTS_N_INSNS (18), /* sdiv */
714 COSTS_N_INSNS (32), /* ddiv */
715 32, /* cache line size */
719 0, /* SF->DF convert */
722 /* Instruction costs on PPC604e processors. */
724 struct processor_costs ppc604e_cost
= {
725 COSTS_N_INSNS (2), /* mulsi */
726 COSTS_N_INSNS (2), /* mulsi_const */
727 COSTS_N_INSNS (2), /* mulsi_const9 */
728 COSTS_N_INSNS (2), /* muldi */
729 COSTS_N_INSNS (20), /* divsi */
730 COSTS_N_INSNS (20), /* divdi */
731 COSTS_N_INSNS (3), /* fp */
732 COSTS_N_INSNS (3), /* dmul */
733 COSTS_N_INSNS (18), /* sdiv */
734 COSTS_N_INSNS (32), /* ddiv */
735 32, /* cache line size */
739 0, /* SF->DF convert */
742 /* Instruction costs on PPC620 processors. */
744 struct processor_costs ppc620_cost
= {
745 COSTS_N_INSNS (5), /* mulsi */
746 COSTS_N_INSNS (4), /* mulsi_const */
747 COSTS_N_INSNS (3), /* mulsi_const9 */
748 COSTS_N_INSNS (7), /* muldi */
749 COSTS_N_INSNS (21), /* divsi */
750 COSTS_N_INSNS (37), /* divdi */
751 COSTS_N_INSNS (3), /* fp */
752 COSTS_N_INSNS (3), /* dmul */
753 COSTS_N_INSNS (18), /* sdiv */
754 COSTS_N_INSNS (32), /* ddiv */
755 128, /* cache line size */
759 0, /* SF->DF convert */
762 /* Instruction costs on PPC630 processors. */
764 struct processor_costs ppc630_cost
= {
765 COSTS_N_INSNS (5), /* mulsi */
766 COSTS_N_INSNS (4), /* mulsi_const */
767 COSTS_N_INSNS (3), /* mulsi_const9 */
768 COSTS_N_INSNS (7), /* muldi */
769 COSTS_N_INSNS (21), /* divsi */
770 COSTS_N_INSNS (37), /* divdi */
771 COSTS_N_INSNS (3), /* fp */
772 COSTS_N_INSNS (3), /* dmul */
773 COSTS_N_INSNS (17), /* sdiv */
774 COSTS_N_INSNS (21), /* ddiv */
775 128, /* cache line size */
779 0, /* SF->DF convert */
782 /* Instruction costs on Cell processor. */
783 /* COSTS_N_INSNS (1) ~ one add. */
785 struct processor_costs ppccell_cost
= {
786 COSTS_N_INSNS (9/2)+2, /* mulsi */
787 COSTS_N_INSNS (6/2), /* mulsi_const */
788 COSTS_N_INSNS (6/2), /* mulsi_const9 */
789 COSTS_N_INSNS (15/2)+2, /* muldi */
790 COSTS_N_INSNS (38/2), /* divsi */
791 COSTS_N_INSNS (70/2), /* divdi */
792 COSTS_N_INSNS (10/2), /* fp */
793 COSTS_N_INSNS (10/2), /* dmul */
794 COSTS_N_INSNS (74/2), /* sdiv */
795 COSTS_N_INSNS (74/2), /* ddiv */
796 128, /* cache line size */
800 0, /* SF->DF convert */
803 /* Instruction costs on PPC750 and PPC7400 processors. */
805 struct processor_costs ppc750_cost
= {
806 COSTS_N_INSNS (5), /* mulsi */
807 COSTS_N_INSNS (3), /* mulsi_const */
808 COSTS_N_INSNS (2), /* mulsi_const9 */
809 COSTS_N_INSNS (5), /* muldi */
810 COSTS_N_INSNS (17), /* divsi */
811 COSTS_N_INSNS (17), /* divdi */
812 COSTS_N_INSNS (3), /* fp */
813 COSTS_N_INSNS (3), /* dmul */
814 COSTS_N_INSNS (17), /* sdiv */
815 COSTS_N_INSNS (31), /* ddiv */
816 32, /* cache line size */
820 0, /* SF->DF convert */
823 /* Instruction costs on PPC7450 processors. */
825 struct processor_costs ppc7450_cost
= {
826 COSTS_N_INSNS (4), /* mulsi */
827 COSTS_N_INSNS (3), /* mulsi_const */
828 COSTS_N_INSNS (3), /* mulsi_const9 */
829 COSTS_N_INSNS (4), /* muldi */
830 COSTS_N_INSNS (23), /* divsi */
831 COSTS_N_INSNS (23), /* divdi */
832 COSTS_N_INSNS (5), /* fp */
833 COSTS_N_INSNS (5), /* dmul */
834 COSTS_N_INSNS (21), /* sdiv */
835 COSTS_N_INSNS (35), /* ddiv */
836 32, /* cache line size */
840 0, /* SF->DF convert */
843 /* Instruction costs on PPC8540 processors. */
845 struct processor_costs ppc8540_cost
= {
846 COSTS_N_INSNS (4), /* mulsi */
847 COSTS_N_INSNS (4), /* mulsi_const */
848 COSTS_N_INSNS (4), /* mulsi_const9 */
849 COSTS_N_INSNS (4), /* muldi */
850 COSTS_N_INSNS (19), /* divsi */
851 COSTS_N_INSNS (19), /* divdi */
852 COSTS_N_INSNS (4), /* fp */
853 COSTS_N_INSNS (4), /* dmul */
854 COSTS_N_INSNS (29), /* sdiv */
855 COSTS_N_INSNS (29), /* ddiv */
856 32, /* cache line size */
859 1, /* prefetch streams /*/
860 0, /* SF->DF convert */
863 /* Instruction costs on E300C2 and E300C3 cores. */
865 struct processor_costs ppce300c2c3_cost
= {
866 COSTS_N_INSNS (4), /* mulsi */
867 COSTS_N_INSNS (4), /* mulsi_const */
868 COSTS_N_INSNS (4), /* mulsi_const9 */
869 COSTS_N_INSNS (4), /* muldi */
870 COSTS_N_INSNS (19), /* divsi */
871 COSTS_N_INSNS (19), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (4), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (33), /* ddiv */
879 1, /* prefetch streams /*/
880 0, /* SF->DF convert */
883 /* Instruction costs on PPCE500MC processors. */
885 struct processor_costs ppce500mc_cost
= {
886 COSTS_N_INSNS (4), /* mulsi */
887 COSTS_N_INSNS (4), /* mulsi_const */
888 COSTS_N_INSNS (4), /* mulsi_const9 */
889 COSTS_N_INSNS (4), /* muldi */
890 COSTS_N_INSNS (14), /* divsi */
891 COSTS_N_INSNS (14), /* divdi */
892 COSTS_N_INSNS (8), /* fp */
893 COSTS_N_INSNS (10), /* dmul */
894 COSTS_N_INSNS (36), /* sdiv */
895 COSTS_N_INSNS (66), /* ddiv */
896 64, /* cache line size */
899 1, /* prefetch streams /*/
900 0, /* SF->DF convert */
903 /* Instruction costs on PPCE500MC64 processors. */
905 struct processor_costs ppce500mc64_cost
= {
906 COSTS_N_INSNS (4), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (4), /* mulsi_const9 */
909 COSTS_N_INSNS (4), /* muldi */
910 COSTS_N_INSNS (14), /* divsi */
911 COSTS_N_INSNS (14), /* divdi */
912 COSTS_N_INSNS (4), /* fp */
913 COSTS_N_INSNS (10), /* dmul */
914 COSTS_N_INSNS (36), /* sdiv */
915 COSTS_N_INSNS (66), /* ddiv */
916 64, /* cache line size */
919 1, /* prefetch streams /*/
920 0, /* SF->DF convert */
923 /* Instruction costs on PPCE5500 processors. */
925 struct processor_costs ppce5500_cost
= {
926 COSTS_N_INSNS (5), /* mulsi */
927 COSTS_N_INSNS (5), /* mulsi_const */
928 COSTS_N_INSNS (4), /* mulsi_const9 */
929 COSTS_N_INSNS (5), /* muldi */
930 COSTS_N_INSNS (14), /* divsi */
931 COSTS_N_INSNS (14), /* divdi */
932 COSTS_N_INSNS (7), /* fp */
933 COSTS_N_INSNS (10), /* dmul */
934 COSTS_N_INSNS (36), /* sdiv */
935 COSTS_N_INSNS (66), /* ddiv */
936 64, /* cache line size */
939 1, /* prefetch streams /*/
940 0, /* SF->DF convert */
943 /* Instruction costs on PPCE6500 processors. */
945 struct processor_costs ppce6500_cost
= {
946 COSTS_N_INSNS (5), /* mulsi */
947 COSTS_N_INSNS (5), /* mulsi_const */
948 COSTS_N_INSNS (4), /* mulsi_const9 */
949 COSTS_N_INSNS (5), /* muldi */
950 COSTS_N_INSNS (14), /* divsi */
951 COSTS_N_INSNS (14), /* divdi */
952 COSTS_N_INSNS (7), /* fp */
953 COSTS_N_INSNS (10), /* dmul */
954 COSTS_N_INSNS (36), /* sdiv */
955 COSTS_N_INSNS (66), /* ddiv */
956 64, /* cache line size */
959 1, /* prefetch streams /*/
960 0, /* SF->DF convert */
963 /* Instruction costs on AppliedMicro Titan processors. */
965 struct processor_costs titan_cost
= {
966 COSTS_N_INSNS (5), /* mulsi */
967 COSTS_N_INSNS (5), /* mulsi_const */
968 COSTS_N_INSNS (5), /* mulsi_const9 */
969 COSTS_N_INSNS (5), /* muldi */
970 COSTS_N_INSNS (18), /* divsi */
971 COSTS_N_INSNS (18), /* divdi */
972 COSTS_N_INSNS (10), /* fp */
973 COSTS_N_INSNS (10), /* dmul */
974 COSTS_N_INSNS (46), /* sdiv */
975 COSTS_N_INSNS (72), /* ddiv */
976 32, /* cache line size */
979 1, /* prefetch streams /*/
980 0, /* SF->DF convert */
983 /* Instruction costs on POWER4 and POWER5 processors. */
985 struct processor_costs power4_cost
= {
986 COSTS_N_INSNS (3), /* mulsi */
987 COSTS_N_INSNS (2), /* mulsi_const */
988 COSTS_N_INSNS (2), /* mulsi_const9 */
989 COSTS_N_INSNS (4), /* muldi */
990 COSTS_N_INSNS (18), /* divsi */
991 COSTS_N_INSNS (34), /* divdi */
992 COSTS_N_INSNS (3), /* fp */
993 COSTS_N_INSNS (3), /* dmul */
994 COSTS_N_INSNS (17), /* sdiv */
995 COSTS_N_INSNS (17), /* ddiv */
996 128, /* cache line size */
999 8, /* prefetch streams /*/
1000 0, /* SF->DF convert */
1003 /* Instruction costs on POWER6 processors. */
1005 struct processor_costs power6_cost
= {
1006 COSTS_N_INSNS (8), /* mulsi */
1007 COSTS_N_INSNS (8), /* mulsi_const */
1008 COSTS_N_INSNS (8), /* mulsi_const9 */
1009 COSTS_N_INSNS (8), /* muldi */
1010 COSTS_N_INSNS (22), /* divsi */
1011 COSTS_N_INSNS (28), /* divdi */
1012 COSTS_N_INSNS (3), /* fp */
1013 COSTS_N_INSNS (3), /* dmul */
1014 COSTS_N_INSNS (13), /* sdiv */
1015 COSTS_N_INSNS (16), /* ddiv */
1016 128, /* cache line size */
1018 2048, /* l2 cache */
1019 16, /* prefetch streams */
1020 0, /* SF->DF convert */
1023 /* Instruction costs on POWER7 processors. */
1025 struct processor_costs power7_cost
= {
1026 COSTS_N_INSNS (2), /* mulsi */
1027 COSTS_N_INSNS (2), /* mulsi_const */
1028 COSTS_N_INSNS (2), /* mulsi_const9 */
1029 COSTS_N_INSNS (2), /* muldi */
1030 COSTS_N_INSNS (18), /* divsi */
1031 COSTS_N_INSNS (34), /* divdi */
1032 COSTS_N_INSNS (3), /* fp */
1033 COSTS_N_INSNS (3), /* dmul */
1034 COSTS_N_INSNS (13), /* sdiv */
1035 COSTS_N_INSNS (16), /* ddiv */
1036 128, /* cache line size */
1039 12, /* prefetch streams */
1040 COSTS_N_INSNS (3), /* SF->DF convert */
1043 /* Instruction costs on POWER8 processors. */
1045 struct processor_costs power8_cost
= {
1046 COSTS_N_INSNS (3), /* mulsi */
1047 COSTS_N_INSNS (3), /* mulsi_const */
1048 COSTS_N_INSNS (3), /* mulsi_const9 */
1049 COSTS_N_INSNS (3), /* muldi */
1050 COSTS_N_INSNS (19), /* divsi */
1051 COSTS_N_INSNS (35), /* divdi */
1052 COSTS_N_INSNS (3), /* fp */
1053 COSTS_N_INSNS (3), /* dmul */
1054 COSTS_N_INSNS (14), /* sdiv */
1055 COSTS_N_INSNS (17), /* ddiv */
1056 128, /* cache line size */
1059 12, /* prefetch streams */
1060 COSTS_N_INSNS (3), /* SF->DF convert */
1063 /* Instruction costs on POWER9 processors. */
1065 struct processor_costs power9_cost
= {
1066 COSTS_N_INSNS (3), /* mulsi */
1067 COSTS_N_INSNS (3), /* mulsi_const */
1068 COSTS_N_INSNS (3), /* mulsi_const9 */
1069 COSTS_N_INSNS (3), /* muldi */
1070 COSTS_N_INSNS (8), /* divsi */
1071 COSTS_N_INSNS (12), /* divdi */
1072 COSTS_N_INSNS (3), /* fp */
1073 COSTS_N_INSNS (3), /* dmul */
1074 COSTS_N_INSNS (13), /* sdiv */
1075 COSTS_N_INSNS (18), /* ddiv */
1076 128, /* cache line size */
1079 8, /* prefetch streams */
1080 COSTS_N_INSNS (3), /* SF->DF convert */
1083 /* Instruction costs on POWER A2 processors. */
1085 struct processor_costs ppca2_cost
= {
1086 COSTS_N_INSNS (16), /* mulsi */
1087 COSTS_N_INSNS (16), /* mulsi_const */
1088 COSTS_N_INSNS (16), /* mulsi_const9 */
1089 COSTS_N_INSNS (16), /* muldi */
1090 COSTS_N_INSNS (22), /* divsi */
1091 COSTS_N_INSNS (28), /* divdi */
1092 COSTS_N_INSNS (3), /* fp */
1093 COSTS_N_INSNS (3), /* dmul */
1094 COSTS_N_INSNS (59), /* sdiv */
1095 COSTS_N_INSNS (72), /* ddiv */
1098 2048, /* l2 cache */
1099 16, /* prefetch streams */
1100 0, /* SF->DF convert */
1103 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1104 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1107 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool);
1108 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1109 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1110 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1111 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1112 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
);
1113 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1114 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1115 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1117 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1119 static bool is_microcoded_insn (rtx_insn
*);
1120 static bool is_nonpipeline_insn (rtx_insn
*);
1121 static bool is_cracked_insn (rtx_insn
*);
1122 static bool is_load_insn (rtx
, rtx
*);
1123 static bool is_store_insn (rtx
, rtx
*);
1124 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1125 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1126 static bool insn_must_be_first_in_group (rtx_insn
*);
1127 static bool insn_must_be_last_in_group (rtx_insn
*);
1128 int easy_vector_constant (rtx
, machine_mode
);
1129 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1130 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1132 static tree
get_prev_label (tree
);
1134 static bool rs6000_mode_dependent_address (const_rtx
);
1135 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1136 static bool rs6000_offsettable_memref_p (rtx
, machine_mode
, bool);
1137 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1139 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1142 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1143 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1145 static bool rs6000_debug_secondary_memory_needed (machine_mode
,
1148 static bool rs6000_debug_can_change_mode_class (machine_mode
,
1152 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1153 = rs6000_mode_dependent_address
;
1155 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1157 = rs6000_secondary_reload_class
;
1159 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1160 = rs6000_preferred_reload_class
;
1162 const int INSN_NOT_AVAILABLE
= -1;
1164 static void rs6000_print_isa_options (FILE *, int, const char *,
1166 static void rs6000_print_builtin_options (FILE *, int, const char *,
1168 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1170 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1171 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1172 enum rs6000_reg_type
,
1174 secondary_reload_info
*,
1176 static enum non_prefixed_form
reg_to_non_prefixed (rtx reg
, machine_mode mode
);
1177 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1179 /* Hash table stuff for keeping track of TOC entries. */
1181 struct GTY((for_user
)) toc_hash_struct
1183 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1184 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1186 machine_mode key_mode
;
1190 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1192 static hashval_t
hash (toc_hash_struct
*);
1193 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1196 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1200 /* Default register names. */
1201 char rs6000_reg_names
[][8] =
1204 "0", "1", "2", "3", "4", "5", "6", "7",
1205 "8", "9", "10", "11", "12", "13", "14", "15",
1206 "16", "17", "18", "19", "20", "21", "22", "23",
1207 "24", "25", "26", "27", "28", "29", "30", "31",
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1219 "lr", "ctr", "ca", "ap",
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 /* vrsave vscr sfp */
1223 "vrsave", "vscr", "sfp",
1226 #ifdef TARGET_REGNAMES
1227 static const char alt_reg_names
[][8] =
1230 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1231 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1232 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1233 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1235 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1236 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1237 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1238 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1240 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1241 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1242 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1243 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1245 "lr", "ctr", "ca", "ap",
1247 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1248 /* vrsave vscr sfp */
1249 "vrsave", "vscr", "sfp",
1253 /* Table of valid machine attributes. */
1255 static const struct attribute_spec rs6000_attribute_table
[] =
1257 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1258 affects_type_identity, handler, exclude } */
1259 { "altivec", 1, 1, false, true, false, false,
1260 rs6000_handle_altivec_attribute
, NULL
},
1261 { "longcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute
, NULL
},
1263 { "shortcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute
, NULL
},
1265 { "ms_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute
, NULL
},
1267 { "gcc_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute
, NULL
},
1269 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1270 SUBTARGET_ATTRIBUTE_TABLE
,
1272 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
1275 #ifndef TARGET_PROFILE_KERNEL
1276 #define TARGET_PROFILE_KERNEL 0
1279 /* Initialize the GCC target structure. */
1280 #undef TARGET_ATTRIBUTE_TABLE
1281 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1282 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1283 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1284 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1285 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1287 #undef TARGET_ASM_ALIGNED_DI_OP
1288 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1290 /* Default unaligned ops are only provided for ELF. Find the ops needed
1291 for non-ELF systems. */
1292 #ifndef OBJECT_FORMAT_ELF
1294 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1296 #undef TARGET_ASM_UNALIGNED_HI_OP
1297 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1298 #undef TARGET_ASM_UNALIGNED_SI_OP
1299 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1300 #undef TARGET_ASM_UNALIGNED_DI_OP
1301 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1304 #undef TARGET_ASM_UNALIGNED_HI_OP
1305 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1306 #undef TARGET_ASM_UNALIGNED_SI_OP
1307 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1308 #undef TARGET_ASM_UNALIGNED_DI_OP
1309 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1315 /* This hook deals with fixups for relocatable code and DI-mode objects
1317 #undef TARGET_ASM_INTEGER
1318 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1320 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1321 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1322 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1325 #undef TARGET_SET_UP_BY_PROLOGUE
1326 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1328 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1329 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1330 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1331 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1332 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1336 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1338 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1341 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1342 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1344 #undef TARGET_INTERNAL_ARG_POINTER
1345 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1347 #undef TARGET_HAVE_TLS
1348 #define TARGET_HAVE_TLS HAVE_AS_TLS
1350 #undef TARGET_CANNOT_FORCE_CONST_MEM
1351 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1353 #undef TARGET_DELEGITIMIZE_ADDRESS
1354 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1356 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1357 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1359 #undef TARGET_LEGITIMATE_COMBINED_INSN
1360 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1362 #undef TARGET_ASM_FUNCTION_PROLOGUE
1363 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1364 #undef TARGET_ASM_FUNCTION_EPILOGUE
1365 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1367 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1368 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1370 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1371 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1373 #undef TARGET_LEGITIMIZE_ADDRESS
1374 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1376 #undef TARGET_SCHED_VARIABLE_ISSUE
1377 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1379 #undef TARGET_SCHED_ISSUE_RATE
1380 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1381 #undef TARGET_SCHED_ADJUST_COST
1382 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1383 #undef TARGET_SCHED_ADJUST_PRIORITY
1384 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1385 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1386 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1387 #undef TARGET_SCHED_INIT
1388 #define TARGET_SCHED_INIT rs6000_sched_init
1389 #undef TARGET_SCHED_FINISH
1390 #define TARGET_SCHED_FINISH rs6000_sched_finish
1391 #undef TARGET_SCHED_REORDER
1392 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1393 #undef TARGET_SCHED_REORDER2
1394 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1396 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1397 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1399 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1400 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1402 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1403 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1404 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1405 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1406 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1407 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1408 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1409 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1411 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1412 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1414 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1415 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1416 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1417 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1418 rs6000_builtin_support_vector_misalignment
1419 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1420 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1421 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1422 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1423 rs6000_builtin_vectorization_cost
1424 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1425 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1426 rs6000_preferred_simd_mode
1427 #undef TARGET_VECTORIZE_INIT_COST
1428 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1429 #undef TARGET_VECTORIZE_ADD_STMT_COST
1430 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1431 #undef TARGET_VECTORIZE_FINISH_COST
1432 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1433 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1434 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1436 #undef TARGET_LOOP_UNROLL_ADJUST
1437 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1439 #undef TARGET_INIT_BUILTINS
1440 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1441 #undef TARGET_BUILTIN_DECL
1442 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1444 #undef TARGET_FOLD_BUILTIN
1445 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1446 #undef TARGET_GIMPLE_FOLD_BUILTIN
1447 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1449 #undef TARGET_EXPAND_BUILTIN
1450 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1452 #undef TARGET_MANGLE_TYPE
1453 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1455 #undef TARGET_INIT_LIBFUNCS
1456 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1459 #undef TARGET_BINDS_LOCAL_P
1460 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1463 #undef TARGET_MS_BITFIELD_LAYOUT_P
1464 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1466 #undef TARGET_ASM_OUTPUT_MI_THUNK
1467 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1472 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1473 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1475 #undef TARGET_REGISTER_MOVE_COST
1476 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1477 #undef TARGET_MEMORY_MOVE_COST
1478 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1479 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1480 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1481 rs6000_ira_change_pseudo_allocno_class
1482 #undef TARGET_CANNOT_COPY_INSN_P
1483 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1484 #undef TARGET_RTX_COSTS
1485 #define TARGET_RTX_COSTS rs6000_rtx_costs
1486 #undef TARGET_ADDRESS_COST
1487 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1488 #undef TARGET_INSN_COST
1489 #define TARGET_INSN_COST rs6000_insn_cost
1491 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1492 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1494 #undef TARGET_PROMOTE_FUNCTION_MODE
1495 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1497 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1498 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1500 #undef TARGET_RETURN_IN_MEMORY
1501 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1503 #undef TARGET_RETURN_IN_MSB
1504 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1506 #undef TARGET_SETUP_INCOMING_VARARGS
1507 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1509 /* Always strict argument naming on rs6000. */
1510 #undef TARGET_STRICT_ARGUMENT_NAMING
1511 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1512 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1513 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1514 #undef TARGET_SPLIT_COMPLEX_ARG
1515 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1516 #undef TARGET_MUST_PASS_IN_STACK
1517 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1518 #undef TARGET_PASS_BY_REFERENCE
1519 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1520 #undef TARGET_ARG_PARTIAL_BYTES
1521 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1522 #undef TARGET_FUNCTION_ARG_ADVANCE
1523 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1524 #undef TARGET_FUNCTION_ARG
1525 #define TARGET_FUNCTION_ARG rs6000_function_arg
1526 #undef TARGET_FUNCTION_ARG_PADDING
1527 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1528 #undef TARGET_FUNCTION_ARG_BOUNDARY
1529 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1531 #undef TARGET_BUILD_BUILTIN_VA_LIST
1532 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1534 #undef TARGET_EXPAND_BUILTIN_VA_START
1535 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1537 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1538 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1540 #undef TARGET_EH_RETURN_FILTER_MODE
1541 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1543 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1544 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1546 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1547 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1549 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1550 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1552 #undef TARGET_FLOATN_MODE
1553 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1555 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1556 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1558 #undef TARGET_MD_ASM_ADJUST
1559 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1561 #undef TARGET_OPTION_OVERRIDE
1562 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1564 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1565 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1566 rs6000_builtin_vectorized_function
1568 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1569 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1570 rs6000_builtin_md_vectorized_function
1572 #undef TARGET_STACK_PROTECT_GUARD
1573 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1576 #undef TARGET_STACK_PROTECT_FAIL
1577 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1581 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1582 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1585 /* Use a 32-bit anchor range. This leads to sequences like:
1587 addis tmp,anchor,high
1590 where tmp itself acts as an anchor, and can be shared between
1591 accesses to the same 64k page. */
1592 #undef TARGET_MIN_ANCHOR_OFFSET
1593 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1594 #undef TARGET_MAX_ANCHOR_OFFSET
1595 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1596 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1597 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1598 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1599 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1601 #undef TARGET_BUILTIN_RECIPROCAL
1602 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1604 #undef TARGET_SECONDARY_RELOAD
1605 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1606 #undef TARGET_SECONDARY_MEMORY_NEEDED
1607 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1608 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1609 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1611 #undef TARGET_LEGITIMATE_ADDRESS_P
1612 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1614 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1615 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1617 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1618 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1620 #undef TARGET_CAN_ELIMINATE
1621 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1623 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1624 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1626 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1627 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1629 #undef TARGET_TRAMPOLINE_INIT
1630 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1632 #undef TARGET_FUNCTION_VALUE
1633 #define TARGET_FUNCTION_VALUE rs6000_function_value
1635 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1636 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1638 #undef TARGET_OPTION_SAVE
1639 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1641 #undef TARGET_OPTION_RESTORE
1642 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1644 #undef TARGET_OPTION_PRINT
1645 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1647 #undef TARGET_CAN_INLINE_P
1648 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1650 #undef TARGET_SET_CURRENT_FUNCTION
1651 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1653 #undef TARGET_LEGITIMATE_CONSTANT_P
1654 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1656 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1657 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1659 #undef TARGET_CAN_USE_DOLOOP_P
1660 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1662 #undef TARGET_PREDICT_DOLOOP_P
1663 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1665 #undef TARGET_HAVE_COUNT_REG_DECR_P
1666 #define TARGET_HAVE_COUNT_REG_DECR_P true
1668 /* 1000000000 is infinite cost in IVOPTs. */
1669 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1670 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1672 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1673 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1675 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1676 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1678 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1679 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1680 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1681 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1682 #undef TARGET_UNWIND_WORD_MODE
1683 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1685 #undef TARGET_OFFLOAD_OPTIONS
1686 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1688 #undef TARGET_C_MODE_FOR_SUFFIX
1689 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1691 #undef TARGET_INVALID_BINARY_OP
1692 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1694 #undef TARGET_OPTAB_SUPPORTED_P
1695 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1697 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1698 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1700 #undef TARGET_COMPARE_VERSION_PRIORITY
1701 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1703 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1704 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1705 rs6000_generate_version_dispatcher_body
1707 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1708 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1709 rs6000_get_function_versions_dispatcher
1711 #undef TARGET_OPTION_FUNCTION_VERSIONS
1712 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1714 #undef TARGET_HARD_REGNO_NREGS
1715 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1716 #undef TARGET_HARD_REGNO_MODE_OK
1717 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1719 #undef TARGET_MODES_TIEABLE_P
1720 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1722 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1723 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1724 rs6000_hard_regno_call_part_clobbered
1726 #undef TARGET_SLOW_UNALIGNED_ACCESS
1727 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1729 #undef TARGET_CAN_CHANGE_MODE_CLASS
1730 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1732 #undef TARGET_CONSTANT_ALIGNMENT
1733 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1735 #undef TARGET_STARTING_FRAME_OFFSET
1736 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1738 #if TARGET_ELF && RS6000_WEAK
1739 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1740 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1743 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1744 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1746 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1747 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1749 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1750 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1751 rs6000_cannot_substitute_mem_equiv_p
1753 #undef TARGET_INVALID_CONVERSION
1754 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1757 /* Processor table. */
1760 const char *const name
; /* Canonical processor name. */
1761 const enum processor_type processor
; /* Processor type enum value. */
1762 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1765 static struct rs6000_ptt
const processor_target_table
[] =
1767 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1768 #include "rs6000-cpus.def"
1772 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1776 rs6000_cpu_name_lookup (const char *name
)
1782 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
1783 if (! strcmp (name
, processor_target_table
[i
].name
))
1791 /* Return number of consecutive hard regs needed starting at reg REGNO
1792 to hold something of mode MODE.
1793 This is ordinarily the length in words of a value of mode MODE
1794 but can be less for certain modes in special long registers.
1796 POWER and PowerPC GPRs hold 32 bits worth;
1797 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1800 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
1802 unsigned HOST_WIDE_INT reg_size
;
1804 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1805 128-bit floating point that can go in vector registers, which has VSX
1806 memory addressing. */
1807 if (FP_REGNO_P (regno
))
1808 reg_size
= (VECTOR_MEM_VSX_P (mode
) || VECTOR_ALIGNMENT_P (mode
)
1809 ? UNITS_PER_VSX_WORD
1810 : UNITS_PER_FP_WORD
);
1812 else if (ALTIVEC_REGNO_P (regno
))
1813 reg_size
= UNITS_PER_ALTIVEC_WORD
;
1816 reg_size
= UNITS_PER_WORD
;
1818 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
1821 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1824 rs6000_hard_regno_mode_ok_uncached (int regno
, machine_mode mode
)
1826 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
1828 if (COMPLEX_MODE_P (mode
))
1829 mode
= GET_MODE_INNER (mode
);
1831 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1834 return (TARGET_MMA
&& VSX_REGNO_P (regno
) && (regno
& 1) == 0);
1836 /* MMA accumulator modes need FPR registers divisible by 4. */
1838 return (TARGET_MMA
&& FP_REGNO_P (regno
) && (regno
& 3) == 0);
1840 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1841 register combinations, and use PTImode where we need to deal with quad
1842 word memory operations. Don't allow quad words in the argument or frame
1843 pointer registers, just registers 0..31. */
1844 if (mode
== PTImode
)
1845 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1846 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1847 && ((regno
& 1) == 0));
1849 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1850 implementations. Don't allow an item to be split between a FP register
1851 and an Altivec register. Allow TImode in all VSX registers if the user
1853 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
1854 && (VECTOR_MEM_VSX_P (mode
)
1855 || VECTOR_ALIGNMENT_P (mode
)
1856 || reg_addr
[mode
].scalar_in_vmx_p
1858 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
1860 if (FP_REGNO_P (regno
))
1861 return FP_REGNO_P (last_regno
);
1863 if (ALTIVEC_REGNO_P (regno
))
1865 if (GET_MODE_SIZE (mode
) < 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
1868 return ALTIVEC_REGNO_P (last_regno
);
1872 /* The GPRs can hold any mode, but values bigger than one register
1873 cannot go past R31. */
1874 if (INT_REGNO_P (regno
))
1875 return INT_REGNO_P (last_regno
);
1877 /* The float registers (except for VSX vector modes) can only hold floating
1878 modes and DImode. */
1879 if (FP_REGNO_P (regno
))
1881 if (VECTOR_ALIGNMENT_P (mode
))
1884 if (SCALAR_FLOAT_MODE_P (mode
)
1885 && (mode
!= TDmode
|| (regno
% 2) == 0)
1886 && FP_REGNO_P (last_regno
))
1889 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1891 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
1894 if (TARGET_P8_VECTOR
&& (mode
== SImode
))
1897 if (TARGET_P9_VECTOR
&& (mode
== QImode
|| mode
== HImode
))
1904 /* The CR register can only hold CC modes. */
1905 if (CR_REGNO_P (regno
))
1906 return GET_MODE_CLASS (mode
) == MODE_CC
;
1908 if (CA_REGNO_P (regno
))
1909 return mode
== Pmode
|| mode
== SImode
;
1911 /* AltiVec only in AldyVec registers. */
1912 if (ALTIVEC_REGNO_P (regno
))
1913 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
1914 || mode
== V1TImode
);
1916 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1917 and it must be able to fit within the register set. */
1919 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
1922 /* Implement TARGET_HARD_REGNO_NREGS. */
1925 rs6000_hard_regno_nregs_hook (unsigned int regno
, machine_mode mode
)
1927 return rs6000_hard_regno_nregs
[mode
][regno
];
1930 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1933 rs6000_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
1935 return rs6000_hard_regno_mode_ok_p
[mode
][regno
];
1938 /* Implement TARGET_MODES_TIEABLE_P.
1940 PTImode cannot tie with other modes because PTImode is restricted to even
1941 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1944 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1945 registers) or XOmode (vector quad, restricted to FPR registers divisible
1946 by 4) to tie with other modes.
1948 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1949 128-bit floating point on VSX systems ties with other vectors. */
1952 rs6000_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
1954 if (mode1
== PTImode
|| mode1
== OOmode
|| mode1
== XOmode
1955 || mode2
== PTImode
|| mode2
== OOmode
|| mode2
== XOmode
)
1956 return mode1
== mode2
;
1958 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1
))
1959 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2
);
1960 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2
))
1963 if (SCALAR_FLOAT_MODE_P (mode1
))
1964 return SCALAR_FLOAT_MODE_P (mode2
);
1965 if (SCALAR_FLOAT_MODE_P (mode2
))
1968 if (GET_MODE_CLASS (mode1
) == MODE_CC
)
1969 return GET_MODE_CLASS (mode2
) == MODE_CC
;
1970 if (GET_MODE_CLASS (mode2
) == MODE_CC
)
1976 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1979 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
1984 && GET_MODE_SIZE (mode
) > 4
1985 && INT_REGNO_P (regno
))
1989 && FP_REGNO_P (regno
)
1990 && GET_MODE_SIZE (mode
) > 8
1991 && !FLOAT128_2REG_P (mode
))
1997 /* Print interesting facts about registers. */
1999 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2003 for (r
= first_regno
; r
<= last_regno
; ++r
)
2005 const char *comma
= "";
2008 if (first_regno
== last_regno
)
2009 fprintf (stderr
, "%s:\t", reg_name
);
2011 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2014 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2015 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2019 fprintf (stderr
, ",\n\t");
2024 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2025 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2026 rs6000_hard_regno_nregs
[m
][r
]);
2028 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2033 if (call_used_or_fixed_reg_p (r
))
2037 fprintf (stderr
, ",\n\t");
2042 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2050 fprintf (stderr
, ",\n\t");
2055 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2061 fprintf (stderr
, ",\n\t");
2065 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2066 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2071 fprintf (stderr
, ",\n\t");
2075 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2080 rs6000_debug_vector_unit (enum rs6000_vector v
)
2086 case VECTOR_NONE
: ret
= "none"; break;
2087 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2088 case VECTOR_VSX
: ret
= "vsx"; break;
2089 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2090 default: ret
= "unknown"; break;
2096 /* Inner function printing just the address mask for a particular reload
2098 DEBUG_FUNCTION
char *
2099 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2104 if ((mask
& RELOAD_REG_VALID
) != 0)
2106 else if (keep_spaces
)
2109 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2111 else if (keep_spaces
)
2114 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2116 else if (keep_spaces
)
2119 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2121 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2123 else if (keep_spaces
)
2126 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2128 else if (keep_spaces
)
2131 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2133 else if (keep_spaces
)
2136 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2138 else if (keep_spaces
)
2146 /* Print the address masks in a human readble fashion. */
2148 rs6000_debug_print_mode (ssize_t m
)
2153 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2154 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2155 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2156 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2158 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2159 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2161 fprintf (stderr
, "%*s Reload=%c%c", spaces
, "",
2162 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2163 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2167 spaces
+= strlen (" Reload=sl");
2169 if (reg_addr
[m
].scalar_in_vmx_p
)
2171 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2175 spaces
+= strlen (" Upper=y");
2177 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2178 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2180 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2182 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2183 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2186 fputs ("\n", stderr
);
2189 #define DEBUG_FMT_ID "%-32s= "
2190 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2191 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2192 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2194 /* Print various interesting information with -mdebug=reg. */
2196 rs6000_debug_reg_global (void)
2198 static const char *const tf
[2] = { "false", "true" };
2199 const char *nl
= (const char *)0;
2202 char costly_num
[20];
2204 char flags_buffer
[40];
2205 const char *costly_str
;
2206 const char *nop_str
;
2207 const char *trace_str
;
2208 const char *abi_str
;
2209 const char *cmodel_str
;
2210 struct cl_target_option cl_opts
;
2212 /* Modes we want tieable information on. */
2213 static const machine_mode print_tieable_modes
[] = {
2252 /* Virtual regs we are interested in. */
2253 const static struct {
2254 int regno
; /* register number. */
2255 const char *name
; /* register name. */
2256 } virtual_regs
[] = {
2257 { STACK_POINTER_REGNUM
, "stack pointer:" },
2258 { TOC_REGNUM
, "toc: " },
2259 { STATIC_CHAIN_REGNUM
, "static chain: " },
2260 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2261 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2262 { ARG_POINTER_REGNUM
, "arg pointer: " },
2263 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2264 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2265 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2266 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2267 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2268 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2269 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2270 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2271 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2272 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2275 fputs ("\nHard register information:\n", stderr
);
2276 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2277 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2278 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2281 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2282 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2283 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2284 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2285 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2286 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2288 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2289 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2290 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2294 "d reg_class = %s\n"
2295 "f reg_class = %s\n"
2296 "v reg_class = %s\n"
2297 "wa reg_class = %s\n"
2298 "we reg_class = %s\n"
2299 "wr reg_class = %s\n"
2300 "wx reg_class = %s\n"
2301 "wA reg_class = %s\n"
2303 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2304 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_f
]],
2305 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2306 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2307 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2308 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2309 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2310 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]]);
2313 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2314 rs6000_debug_print_mode (m
);
2316 fputs ("\n", stderr
);
2318 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2320 machine_mode mode1
= print_tieable_modes
[m1
];
2321 bool first_time
= true;
2323 nl
= (const char *)0;
2324 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2326 machine_mode mode2
= print_tieable_modes
[m2
];
2327 if (mode1
!= mode2
&& rs6000_modes_tieable_p (mode1
, mode2
))
2331 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2336 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2341 fputs ("\n", stderr
);
2347 if (rs6000_recip_control
)
2349 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2351 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2352 if (rs6000_recip_bits
[m
])
2355 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2357 (RS6000_RECIP_AUTO_RE_P (m
)
2359 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2360 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2362 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2365 fputs ("\n", stderr
);
2368 if (rs6000_cpu_index
>= 0)
2370 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2372 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2374 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2375 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2378 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2380 if (rs6000_tune_index
>= 0)
2382 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2384 = processor_target_table
[rs6000_tune_index
].target_enable
;
2386 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2387 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2390 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2392 cl_target_option_save (&cl_opts
, &global_options
, &global_options_set
);
2393 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2396 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2397 rs6000_isa_flags_explicit
);
2399 rs6000_print_builtin_options (stderr
, 0, "rs6000_builtin_mask",
2400 rs6000_builtin_mask
);
2402 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2404 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2405 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2407 switch (rs6000_sched_costly_dep
)
2409 case max_dep_latency
:
2410 costly_str
= "max_dep_latency";
2414 costly_str
= "no_dep_costly";
2417 case all_deps_costly
:
2418 costly_str
= "all_deps_costly";
2421 case true_store_to_load_dep_costly
:
2422 costly_str
= "true_store_to_load_dep_costly";
2425 case store_to_load_dep_costly
:
2426 costly_str
= "store_to_load_dep_costly";
2430 costly_str
= costly_num
;
2431 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2435 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2437 switch (rs6000_sched_insert_nops
)
2439 case sched_finish_regroup_exact
:
2440 nop_str
= "sched_finish_regroup_exact";
2443 case sched_finish_pad_groups
:
2444 nop_str
= "sched_finish_pad_groups";
2447 case sched_finish_none
:
2448 nop_str
= "sched_finish_none";
2453 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2457 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2459 switch (rs6000_sdata
)
2466 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2470 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2474 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2479 switch (rs6000_traceback
)
2481 case traceback_default
: trace_str
= "default"; break;
2482 case traceback_none
: trace_str
= "none"; break;
2483 case traceback_part
: trace_str
= "part"; break;
2484 case traceback_full
: trace_str
= "full"; break;
2485 default: trace_str
= "unknown"; break;
2488 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2490 switch (rs6000_current_cmodel
)
2492 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2493 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2494 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2495 default: cmodel_str
= "unknown"; break;
2498 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2500 switch (rs6000_current_abi
)
2502 case ABI_NONE
: abi_str
= "none"; break;
2503 case ABI_AIX
: abi_str
= "aix"; break;
2504 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2505 case ABI_V4
: abi_str
= "V4"; break;
2506 case ABI_DARWIN
: abi_str
= "darwin"; break;
2507 default: abi_str
= "unknown"; break;
2510 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2512 if (rs6000_altivec_abi
)
2513 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2515 if (rs6000_darwin64_abi
)
2516 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2518 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2519 (TARGET_SOFT_FLOAT
? "true" : "false"));
2521 if (TARGET_LINK_STACK
)
2522 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2524 if (TARGET_P8_FUSION
)
2528 strcpy (options
, "power8");
2529 if (TARGET_P8_FUSION_SIGN
)
2530 strcat (options
, ", sign");
2532 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2535 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2536 TARGET_SECURE_PLT
? "secure" : "bss");
2537 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2538 aix_struct_return
? "aix" : "sysv");
2539 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2540 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2541 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2542 tf
[!!rs6000_align_branch_targets
]);
2543 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2544 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2545 rs6000_long_double_type_size
);
2546 if (rs6000_long_double_type_size
> 64)
2548 fprintf (stderr
, DEBUG_FMT_S
, "long double type",
2549 TARGET_IEEEQUAD
? "IEEE" : "IBM");
2550 fprintf (stderr
, DEBUG_FMT_S
, "default long double type",
2551 TARGET_IEEEQUAD_DEFAULT
? "IEEE" : "IBM");
2553 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2554 (int)rs6000_sched_restricted_insns_priority
);
2555 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2557 fprintf (stderr
, DEBUG_FMT_D
, "Number of rs6000 builtins",
2558 (int)RS6000_BUILTIN_COUNT
);
2560 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2561 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2564 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2565 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2567 if (TARGET_DIRECT_MOVE_128
)
2568 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2569 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2573 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2574 legitimate address support to figure out the appropriate addressing to
2578 rs6000_setup_reg_addr_masks (void)
2580 ssize_t rc
, reg
, m
, nregs
;
2581 addr_mask_type any_addr_mask
, addr_mask
;
2583 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2585 machine_mode m2
= (machine_mode
) m
;
2586 bool complex_p
= false;
2587 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2590 if (COMPLEX_MODE_P (m2
))
2593 m2
= GET_MODE_INNER (m2
);
2596 msize
= GET_MODE_SIZE (m2
);
2598 /* SDmode is special in that we want to access it only via REG+REG
2599 addressing on power7 and above, since we want to use the LFIWZX and
2600 STFIWZX instructions to load it. */
2601 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2604 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2607 reg
= reload_reg_map
[rc
].reg
;
2609 /* Can mode values go in the GPR/FPR/Altivec registers? */
2610 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2612 bool small_int_vsx_p
= (small_int_p
2613 && (rc
== RELOAD_REG_FPR
2614 || rc
== RELOAD_REG_VMX
));
2616 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2617 addr_mask
|= RELOAD_REG_VALID
;
2619 /* Indicate if the mode takes more than 1 physical register. If
2620 it takes a single register, indicate it can do REG+REG
2621 addressing. Small integers in VSX registers can only do
2622 REG+REG addressing. */
2623 if (small_int_vsx_p
)
2624 addr_mask
|= RELOAD_REG_INDEXED
;
2625 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2626 addr_mask
|= RELOAD_REG_MULTIPLE
;
2628 addr_mask
|= RELOAD_REG_INDEXED
;
2630 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2631 addressing. If we allow scalars into Altivec registers,
2632 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2634 For VSX systems, we don't allow update addressing for
2635 DFmode/SFmode if those registers can go in both the
2636 traditional floating point registers and Altivec registers.
2637 The load/store instructions for the Altivec registers do not
2638 have update forms. If we allowed update addressing, it seems
2639 to break IV-OPT code using floating point if the index type is
2640 int instead of long (PR target/81550 and target/84042). */
2643 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2645 && !VECTOR_MODE_P (m2
)
2646 && !VECTOR_ALIGNMENT_P (m2
)
2648 && (m
!= E_DFmode
|| !TARGET_VSX
)
2649 && (m
!= E_SFmode
|| !TARGET_P8_VECTOR
)
2650 && !small_int_vsx_p
)
2652 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2654 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2655 we don't allow PRE_MODIFY for some multi-register
2660 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2664 if (TARGET_POWERPC64
)
2665 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2670 if (TARGET_HARD_FLOAT
)
2671 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2677 /* GPR and FPR registers can do REG+OFFSET addressing, except
2678 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2679 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2680 if ((addr_mask
!= 0) && !indexed_only_p
2682 && (rc
== RELOAD_REG_GPR
2683 || ((msize
== 8 || m2
== SFmode
)
2684 && (rc
== RELOAD_REG_FPR
2685 || (rc
== RELOAD_REG_VMX
&& TARGET_P9_VECTOR
)))))
2686 addr_mask
|= RELOAD_REG_OFFSET
;
2688 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2689 instructions are enabled. The offset for 128-bit VSX registers is
2690 only 12-bits. While GPRs can handle the full offset range, VSX
2691 registers can only handle the restricted range. */
2692 else if ((addr_mask
!= 0) && !indexed_only_p
2693 && msize
== 16 && TARGET_P9_VECTOR
2694 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2695 || (m2
== TImode
&& TARGET_VSX
)))
2697 addr_mask
|= RELOAD_REG_OFFSET
;
2698 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2699 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2702 /* Vector pairs can do both indexed and offset loads if the
2703 instructions are enabled, otherwise they can only do offset loads
2704 since it will be broken into two vector moves. Vector quads can
2705 only do offset loads. */
2706 else if ((addr_mask
!= 0) && TARGET_MMA
2707 && (m2
== OOmode
|| m2
== XOmode
))
2709 addr_mask
|= RELOAD_REG_OFFSET
;
2710 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2712 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2714 addr_mask
|= RELOAD_REG_INDEXED
;
2718 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2719 addressing on 128-bit types. */
2720 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2721 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2722 addr_mask
|= RELOAD_REG_AND_M16
;
2724 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2725 any_addr_mask
|= addr_mask
;
2728 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2733 /* Initialize the various global tables that are based on register size. */
2735 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2741 /* Precalculate REGNO_REG_CLASS. */
2742 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2743 for (r
= 1; r
< 32; ++r
)
2744 rs6000_regno_regclass
[r
] = BASE_REGS
;
2746 for (r
= 32; r
< 64; ++r
)
2747 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2749 for (r
= 64; HARD_REGISTER_NUM_P (r
); ++r
)
2750 rs6000_regno_regclass
[r
] = NO_REGS
;
2752 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
2753 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
2755 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
2756 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
2757 rs6000_regno_regclass
[r
] = CR_REGS
;
2759 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
2760 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
2761 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
2762 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
2763 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
2764 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
2765 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
2767 /* Precalculate register class to simpler reload register class. We don't
2768 need all of the register classes that are combinations of different
2769 classes, just the simple ones that have constraint letters. */
2770 for (c
= 0; c
< N_REG_CLASSES
; c
++)
2771 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
2773 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
2774 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
2775 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
2776 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
2777 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
2778 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
2779 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
2780 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
2781 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
2782 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
2786 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
2787 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
2791 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
2792 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
2795 /* Precalculate the valid memory formats as well as the vector information,
2796 this must be set up before the rs6000_hard_regno_nregs_internal calls
2798 gcc_assert ((int)VECTOR_NONE
== 0);
2799 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
2800 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_mem
));
2802 gcc_assert ((int)CODE_FOR_nothing
== 0);
2803 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
2805 gcc_assert ((int)NO_REGS
== 0);
2806 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
2808 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2809 believes it can use native alignment or still uses 128-bit alignment. */
2810 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
2821 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2822 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2823 if (TARGET_FLOAT128_TYPE
)
2825 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
2826 rs6000_vector_align
[KFmode
] = 128;
2828 if (FLOAT128_IEEE_P (TFmode
))
2830 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
2831 rs6000_vector_align
[TFmode
] = 128;
2835 /* V2DF mode, VSX only. */
2838 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
2839 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
2840 rs6000_vector_align
[V2DFmode
] = align64
;
2843 /* V4SF mode, either VSX or Altivec. */
2846 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
2847 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
2848 rs6000_vector_align
[V4SFmode
] = align32
;
2850 else if (TARGET_ALTIVEC
)
2852 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
2853 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
2854 rs6000_vector_align
[V4SFmode
] = align32
;
2857 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2861 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
2862 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
2863 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
2864 rs6000_vector_align
[V4SImode
] = align32
;
2865 rs6000_vector_align
[V8HImode
] = align32
;
2866 rs6000_vector_align
[V16QImode
] = align32
;
2870 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
2871 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
2872 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
2876 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
2877 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
2878 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
2882 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2883 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2886 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
2887 rs6000_vector_unit
[V2DImode
]
2888 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2889 rs6000_vector_align
[V2DImode
] = align64
;
2891 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
2892 rs6000_vector_unit
[V1TImode
]
2893 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2894 rs6000_vector_align
[V1TImode
] = 128;
2897 /* DFmode, see if we want to use the VSX unit. Memory is handled
2898 differently, so don't set rs6000_vector_mem. */
2901 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
2902 rs6000_vector_align
[DFmode
] = 64;
2905 /* SFmode, see if we want to use the VSX unit. */
2906 if (TARGET_P8_VECTOR
)
2908 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
2909 rs6000_vector_align
[SFmode
] = 32;
2912 /* Allow TImode in VSX register and set the VSX memory macros. */
2915 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
2916 rs6000_vector_align
[TImode
] = align64
;
2919 /* Add support for vector pairs and vector quad registers. */
2922 rs6000_vector_unit
[OOmode
] = VECTOR_NONE
;
2923 rs6000_vector_mem
[OOmode
] = VECTOR_VSX
;
2924 rs6000_vector_align
[OOmode
] = 256;
2926 rs6000_vector_unit
[XOmode
] = VECTOR_NONE
;
2927 rs6000_vector_mem
[XOmode
] = VECTOR_VSX
;
2928 rs6000_vector_align
[XOmode
] = 512;
2931 /* Register class constraints for the constraints that depend on compile
2932 switches. When the VSX code was added, different constraints were added
2933 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2934 of the VSX registers are used. The register classes for scalar floating
2935 point types is set, based on whether we allow that type into the upper
2936 (Altivec) registers. GCC has register classes to target the Altivec
2937 registers for load/store operations, to select using a VSX memory
2938 operation instead of the traditional floating point operation. The
2941 d - Register class to use with traditional DFmode instructions.
2942 f - Register class to use with traditional SFmode instructions.
2943 v - Altivec register.
2944 wa - Any VSX register.
2945 wc - Reserved to represent individual CR bits (used in LLVM).
2946 wn - always NO_REGS.
2947 wr - GPR if 64-bit mode is permitted.
2948 wx - Float register if we can do 32-bit int stores. */
2950 if (TARGET_HARD_FLOAT
)
2952 rs6000_constraints
[RS6000_CONSTRAINT_f
] = FLOAT_REGS
; /* SFmode */
2953 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
; /* DFmode */
2957 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
2959 /* Add conditional constraints based on various options, to allow us to
2960 collapse multiple insn patterns. */
2962 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
2964 if (TARGET_POWERPC64
)
2966 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
2967 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
2971 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
2973 /* Support for new direct moves (ISA 3.0 + 64bit). */
2974 if (TARGET_DIRECT_MOVE_128
)
2975 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
2977 /* Set up the reload helper and direct move functions. */
2978 if (TARGET_VSX
|| TARGET_ALTIVEC
)
2982 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
2983 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
2984 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
2985 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
2986 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
2987 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
2988 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
2989 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
2990 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
2991 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
2992 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
2993 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
2994 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
2995 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
2996 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
2997 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
2998 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
2999 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3000 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3001 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3003 if (FLOAT128_VECTOR_P (KFmode
))
3005 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3006 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3009 if (FLOAT128_VECTOR_P (TFmode
))
3011 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3012 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3015 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3017 if (TARGET_NO_SDMODE_STACK
)
3019 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3020 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3025 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3026 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3029 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3031 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3032 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3033 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3034 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3035 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3036 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3037 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3038 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3039 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3041 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3042 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3043 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3044 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3045 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3046 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3047 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3048 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3049 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3051 if (FLOAT128_VECTOR_P (KFmode
))
3053 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3054 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3057 if (FLOAT128_VECTOR_P (TFmode
))
3059 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3060 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3065 reg_addr
[OOmode
].reload_store
= CODE_FOR_reload_oo_di_store
;
3066 reg_addr
[OOmode
].reload_load
= CODE_FOR_reload_oo_di_load
;
3067 reg_addr
[XOmode
].reload_store
= CODE_FOR_reload_xo_di_store
;
3068 reg_addr
[XOmode
].reload_load
= CODE_FOR_reload_xo_di_load
;
3074 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3075 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3076 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3077 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3078 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3079 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3080 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3081 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3082 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3083 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3084 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3085 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3086 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3087 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3088 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3089 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3090 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3091 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3092 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3093 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3095 if (FLOAT128_VECTOR_P (KFmode
))
3097 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3098 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3101 if (FLOAT128_IEEE_P (TFmode
))
3103 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3104 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3107 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3109 if (TARGET_NO_SDMODE_STACK
)
3111 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3112 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3117 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3118 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3121 if (TARGET_DIRECT_MOVE
)
3123 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3124 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3125 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3129 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3130 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3132 if (TARGET_P8_VECTOR
)
3134 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3135 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3137 if (TARGET_P9_VECTOR
)
3139 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3140 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3145 /* Precalculate HARD_REGNO_NREGS. */
3146 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3147 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3148 rs6000_hard_regno_nregs
[m
][r
]
3149 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
) m
);
3151 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3152 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3153 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3154 rs6000_hard_regno_mode_ok_p
[m
][r
]
3155 = rs6000_hard_regno_mode_ok_uncached (r
, (machine_mode
) m
);
3157 /* Precalculate CLASS_MAX_NREGS sizes. */
3158 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3162 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3163 reg_size
= UNITS_PER_VSX_WORD
;
3165 else if (c
== ALTIVEC_REGS
)
3166 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3168 else if (c
== FLOAT_REGS
)
3169 reg_size
= UNITS_PER_FP_WORD
;
3172 reg_size
= UNITS_PER_WORD
;
3174 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3176 machine_mode m2
= (machine_mode
)m
;
3177 int reg_size2
= reg_size
;
3179 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3181 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3182 reg_size2
= UNITS_PER_FP_WORD
;
3184 rs6000_class_max_nregs
[m
][c
]
3185 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3189 /* Calculate which modes to automatically generate code to use a the
3190 reciprocal divide and square root instructions. In the future, possibly
3191 automatically generate the instructions even if the user did not specify
3192 -mrecip. The older machines double precision reciprocal sqrt estimate is
3193 not accurate enough. */
3194 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3196 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3198 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3199 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3200 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3201 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3202 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3204 if (TARGET_FRSQRTES
)
3205 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3207 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3208 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3209 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3210 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3211 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3213 if (rs6000_recip_control
)
3215 if (!flag_finite_math_only
)
3216 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3218 if (flag_trapping_math
)
3219 warning (0, "%qs requires %qs or %qs", "-mrecip",
3220 "-fno-trapping-math", "-ffast-math");
3221 if (!flag_reciprocal_math
)
3222 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3224 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3226 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3227 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3228 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3230 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3231 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3232 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3234 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3235 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3236 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3238 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3239 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3240 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3242 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3243 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3244 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3246 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3247 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3248 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3250 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3251 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3252 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3254 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3255 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3256 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3260 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3261 legitimate address support to figure out the appropriate addressing to
3263 rs6000_setup_reg_addr_masks ();
3265 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3267 if (TARGET_DEBUG_REG
)
3268 rs6000_debug_reg_global ();
3270 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3272 "SImode variable mult cost = %d\n"
3273 "SImode constant mult cost = %d\n"
3274 "SImode short constant mult cost = %d\n"
3275 "DImode multipliciation cost = %d\n"
3276 "SImode division cost = %d\n"
3277 "DImode division cost = %d\n"
3278 "Simple fp operation cost = %d\n"
3279 "DFmode multiplication cost = %d\n"
3280 "SFmode division cost = %d\n"
3281 "DFmode division cost = %d\n"
3282 "cache line size = %d\n"
3283 "l1 cache size = %d\n"
3284 "l2 cache size = %d\n"
3285 "simultaneous prefetches = %d\n"
3288 rs6000_cost
->mulsi_const
,
3289 rs6000_cost
->mulsi_const9
,
3297 rs6000_cost
->cache_line_size
,
3298 rs6000_cost
->l1_cache_size
,
3299 rs6000_cost
->l2_cache_size
,
3300 rs6000_cost
->simultaneous_prefetches
);
3305 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3308 darwin_rs6000_override_options (void)
3310 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3312 rs6000_altivec_abi
= 1;
3313 TARGET_ALTIVEC_VRSAVE
= 1;
3314 rs6000_current_abi
= ABI_DARWIN
;
3316 if (DEFAULT_ABI
== ABI_DARWIN
3318 darwin_one_byte_bool
= 1;
3320 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3322 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3323 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3326 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3327 optimisation, and will not work with the most generic case (where the
3328 symbol is undefined external, but there is no symbl stub). */
3330 rs6000_default_long_calls
= 0;
3332 /* ld_classic is (so far) still used for kernel (static) code, and supports
3333 the JBSR longcall / branch islands. */
3336 rs6000_default_long_calls
= 1;
3338 /* Allow a kext author to do -mkernel -mhard-float. */
3339 if (! (rs6000_isa_flags_explicit
& OPTION_MASK_SOFT_FLOAT
))
3340 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3343 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3345 if (!flag_mkernel
&& !flag_apple_kext
3347 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3348 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3350 /* Unless the user (not the configurer) has explicitly overridden
3351 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3352 G4 unless targeting the kernel. */
3355 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3356 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3357 && ! global_options_set
.x_rs6000_cpu_index
)
3359 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3364 /* If not otherwise specified by a target, make 'long double' equivalent to
3367 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3368 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3371 /* Return the builtin mask of the various options used that could affect which
3372 builtins were used. In the past we used target_flags, but we've run out of
3373 bits, and some options are no longer in target_flags. */
3376 rs6000_builtin_mask_calculate (void)
3378 return (((TARGET_ALTIVEC
) ? RS6000_BTM_ALTIVEC
: 0)
3379 | ((TARGET_CMPB
) ? RS6000_BTM_CMPB
: 0)
3380 | ((TARGET_VSX
) ? RS6000_BTM_VSX
: 0)
3381 | ((TARGET_FRE
) ? RS6000_BTM_FRE
: 0)
3382 | ((TARGET_FRES
) ? RS6000_BTM_FRES
: 0)
3383 | ((TARGET_FRSQRTE
) ? RS6000_BTM_FRSQRTE
: 0)
3384 | ((TARGET_FRSQRTES
) ? RS6000_BTM_FRSQRTES
: 0)
3385 | ((TARGET_POPCNTD
) ? RS6000_BTM_POPCNTD
: 0)
3386 | ((rs6000_cpu
== PROCESSOR_CELL
) ? RS6000_BTM_CELL
: 0)
3387 | ((TARGET_P8_VECTOR
) ? RS6000_BTM_P8_VECTOR
: 0)
3388 | ((TARGET_P9_VECTOR
) ? RS6000_BTM_P9_VECTOR
: 0)
3389 | ((TARGET_P9_MISC
) ? RS6000_BTM_P9_MISC
: 0)
3390 | ((TARGET_MODULO
) ? RS6000_BTM_MODULO
: 0)
3391 | ((TARGET_64BIT
) ? RS6000_BTM_64BIT
: 0)
3392 | ((TARGET_POWERPC64
) ? RS6000_BTM_POWERPC64
: 0)
3393 | ((TARGET_CRYPTO
) ? RS6000_BTM_CRYPTO
: 0)
3394 | ((TARGET_HTM
) ? RS6000_BTM_HTM
: 0)
3395 | ((TARGET_DFP
) ? RS6000_BTM_DFP
: 0)
3396 | ((TARGET_HARD_FLOAT
) ? RS6000_BTM_HARD_FLOAT
: 0)
3397 | ((TARGET_LONG_DOUBLE_128
3398 && TARGET_HARD_FLOAT
3399 && !TARGET_IEEEQUAD
) ? RS6000_BTM_LDBL128
: 0)
3400 | ((TARGET_FLOAT128_TYPE
) ? RS6000_BTM_FLOAT128
: 0)
3401 | ((TARGET_FLOAT128_HW
) ? RS6000_BTM_FLOAT128_HW
: 0)
3402 | ((TARGET_MMA
) ? RS6000_BTM_MMA
: 0)
3403 | ((TARGET_POWER10
) ? RS6000_BTM_P10
: 0));
3406 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3407 to clobber the XER[CA] bit because clobbering that bit without telling
3408 the compiler worked just fine with versions of GCC before GCC 5, and
3409 breaking a lot of older code in ways that are hard to track down is
3410 not such a great idea. */
3413 rs6000_md_asm_adjust (vec
<rtx
> &/*outputs*/, vec
<rtx
> &/*inputs*/,
3414 vec
<const char *> &/*constraints*/,
3415 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
3417 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3418 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3422 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3423 but is called when the optimize level is changed via an attribute or
3424 pragma or when it is reset at the end of the code affected by the
3425 attribute or pragma. It is not called at the beginning of compilation
3426 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3427 actions then, you should have TARGET_OPTION_OVERRIDE call
3428 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3431 rs6000_override_options_after_change (void)
3433 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3434 turns -frename-registers on. */
3435 if ((global_options_set
.x_flag_unroll_loops
&& flag_unroll_loops
)
3436 || (global_options_set
.x_flag_unroll_all_loops
3437 && flag_unroll_all_loops
))
3439 if (!global_options_set
.x_unroll_only_small_loops
)
3440 unroll_only_small_loops
= 0;
3441 if (!global_options_set
.x_flag_rename_registers
)
3442 flag_rename_registers
= 1;
3443 if (!global_options_set
.x_flag_cunroll_grow_size
)
3444 flag_cunroll_grow_size
= 1;
3446 else if (!global_options_set
.x_flag_cunroll_grow_size
)
3447 flag_cunroll_grow_size
= flag_peel_loops
|| optimize
>= 3;
3450 #ifdef TARGET_USES_LINUX64_OPT
3452 rs6000_linux64_override_options ()
3454 if (!global_options_set
.x_rs6000_alignment_flags
)
3455 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
3456 if (rs6000_isa_flags
& OPTION_MASK_64BIT
)
3458 if (DEFAULT_ABI
!= ABI_AIX
)
3460 rs6000_current_abi
= ABI_AIX
;
3461 error (INVALID_64BIT
, "call");
3463 dot_symbols
= !strcmp (rs6000_abi_name
, "aixdesc");
3464 if (ELFv2_ABI_CHECK
)
3466 rs6000_current_abi
= ABI_ELFv2
;
3468 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3470 if (rs6000_isa_flags
& OPTION_MASK_RELOCATABLE
)
3472 rs6000_isa_flags
&= ~OPTION_MASK_RELOCATABLE
;
3473 error (INVALID_64BIT
, "relocatable");
3475 if (rs6000_isa_flags
& OPTION_MASK_EABI
)
3477 rs6000_isa_flags
&= ~OPTION_MASK_EABI
;
3478 error (INVALID_64BIT
, "eabi");
3480 if (TARGET_PROTOTYPE
)
3482 target_prototype
= 0;
3483 error (INVALID_64BIT
, "prototype");
3485 if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) == 0)
3487 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3488 error ("%<-m64%> requires a PowerPC64 cpu");
3490 if (!global_options_set
.x_rs6000_current_cmodel
)
3491 SET_CMODEL (CMODEL_MEDIUM
);
3492 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MINIMAL_TOC
) != 0)
3494 if (global_options_set
.x_rs6000_current_cmodel
3495 && rs6000_current_cmodel
!= CMODEL_SMALL
)
3496 error ("%<-mcmodel incompatible with other toc options%>");
3497 if (TARGET_MINIMAL_TOC
)
3498 SET_CMODEL (CMODEL_SMALL
);
3499 else if (TARGET_PCREL
3500 || (PCREL_SUPPORTED_BY_OS
3501 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0))
3502 /* Ignore -mno-minimal-toc. */
3505 SET_CMODEL (CMODEL_SMALL
);
3507 if (rs6000_current_cmodel
!= CMODEL_SMALL
)
3509 if (!global_options_set
.x_TARGET_NO_FP_IN_TOC
)
3510 TARGET_NO_FP_IN_TOC
= rs6000_current_cmodel
== CMODEL_MEDIUM
;
3511 if (!global_options_set
.x_TARGET_NO_SUM_IN_TOC
)
3512 TARGET_NO_SUM_IN_TOC
= 0;
3514 if (TARGET_PLTSEQ
&& DEFAULT_ABI
!= ABI_ELFv2
)
3516 if (global_options_set
.x_rs6000_pltseq
)
3517 warning (0, "%qs unsupported for this ABI",
3519 rs6000_pltseq
= false;
3522 else if (TARGET_64BIT
)
3523 error (INVALID_32BIT
, "32");
3526 if (TARGET_PROFILE_KERNEL
)
3529 error (INVALID_32BIT
, "profile-kernel");
3531 if (global_options_set
.x_rs6000_current_cmodel
)
3533 SET_CMODEL (CMODEL_SMALL
);
3534 error (INVALID_32BIT
, "cmodel");
3540 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3541 This support is only in little endian GLIBC 2.32 or newer. */
3543 glibc_supports_ieee_128bit (void)
3546 if (OPTION_GLIBC
&& !BYTES_BIG_ENDIAN
3547 && ((TARGET_GLIBC_MAJOR
* 1000) + TARGET_GLIBC_MINOR
) >= 2032)
3549 #endif /* OPTION_GLIBC. */
3554 /* Override command line options.
3556 Combine build-specific configuration information with options
3557 specified on the command line to set various state variables which
3558 influence code generation, optimization, and expansion of built-in
3559 functions. Assure that command-line configuration preferences are
3560 compatible with each other and with the build configuration; issue
3561 warnings while adjusting configuration or error messages while
3562 rejecting configuration.
3564 Upon entry to this function:
3566 This function is called once at the beginning of
3567 compilation, and then again at the start and end of compiling
3568 each section of code that has a different configuration, as
3569 indicated, for example, by adding the
3571 __attribute__((__target__("cpu=power9")))
3573 qualifier to a function definition or, for example, by bracketing
3576 #pragma GCC target("altivec")
3580 #pragma GCC reset_options
3582 directives. Parameter global_init_p is true for the initial
3583 invocation, which initializes global variables, and false for all
3584 subsequent invocations.
3587 Various global state information is assumed to be valid. This
3588 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3589 default CPU specified at build configure time, TARGET_DEFAULT,
3590 representing the default set of option flags for the default
3591 target, and global_options_set.x_rs6000_isa_flags, representing
3592 which options were requested on the command line.
3594 Upon return from this function:
3596 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3597 was set by name on the command line. Additionally, if certain
3598 attributes are automatically enabled or disabled by this function
3599 in order to assure compatibility between options and
3600 configuration, the flags associated with those attributes are
3601 also set. By setting these "explicit bits", we avoid the risk
3602 that other code might accidentally overwrite these particular
3603 attributes with "default values".
3605 The various bits of rs6000_isa_flags are set to indicate the
3606 target options that have been selected for the most current
3607 compilation efforts. This has the effect of also turning on the
3608 associated TARGET_XXX values since these are macros which are
3609 generally defined to test the corresponding bit of the
3610 rs6000_isa_flags variable.
3612 The variable rs6000_builtin_mask is set to represent the target
3613 options for the most current compilation efforts, consistent with
3614 the current contents of rs6000_isa_flags. This variable controls
3615 expansion of built-in functions.
3617 Various other global variables and fields of global structures
3618 (over 50 in all) are initialized to reflect the desired options
3619 for the most current compilation efforts. */
3622 rs6000_option_override_internal (bool global_init_p
)
3626 HOST_WIDE_INT set_masks
;
3627 HOST_WIDE_INT ignore_masks
;
3630 struct cl_target_option
*main_target_opt
3631 = ((global_init_p
|| target_option_default_node
== NULL
)
3632 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3634 /* Print defaults. */
3635 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3636 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3638 /* Remember the explicit arguments. */
3640 rs6000_isa_flags_explicit
= global_options_set
.x_rs6000_isa_flags
;
3642 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3643 library functions, so warn about it. The flag may be useful for
3644 performance studies from time to time though, so don't disable it
3646 if (global_options_set
.x_rs6000_alignment_flags
3647 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3648 && DEFAULT_ABI
== ABI_DARWIN
3650 warning (0, "%qs is not supported for 64-bit Darwin;"
3651 " it is incompatible with the installed C and C++ libraries",
3654 /* Numerous experiment shows that IRA based loop pressure
3655 calculation works better for RTL loop invariant motion on targets
3656 with enough (>= 32) registers. It is an expensive optimization.
3657 So it is on only for peak performance. */
3658 if (optimize
>= 3 && global_init_p
3659 && !global_options_set
.x_flag_ira_loop_pressure
)
3660 flag_ira_loop_pressure
= 1;
3662 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3663 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3664 options were already specified. */
3665 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3666 && !global_options_set
.x_flag_asynchronous_unwind_tables
)
3667 flag_asynchronous_unwind_tables
= 1;
3669 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3670 loop unroller is active. It is only checked during unrolling, so
3671 we can just set it on by default. */
3672 if (!global_options_set
.x_flag_variable_expansion_in_unroller
)
3673 flag_variable_expansion_in_unroller
= 1;
3675 /* Set the pointer size. */
3678 rs6000_pmode
= DImode
;
3679 rs6000_pointer_size
= 64;
3683 rs6000_pmode
= SImode
;
3684 rs6000_pointer_size
= 32;
3687 /* Some OSs don't support saving the high part of 64-bit registers on context
3688 switch. Other OSs don't support saving Altivec registers. On those OSs,
3689 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3690 if the user wants either, the user must explicitly specify them and we
3691 won't interfere with the user's specification. */
3693 set_masks
= POWERPC_MASKS
;
3694 #ifdef OS_MISSING_POWERPC64
3695 if (OS_MISSING_POWERPC64
)
3696 set_masks
&= ~OPTION_MASK_POWERPC64
;
3698 #ifdef OS_MISSING_ALTIVEC
3699 if (OS_MISSING_ALTIVEC
)
3700 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
3701 | OTHER_VSX_VECTOR_MASKS
);
3704 /* Don't override by the processor default if given explicitly. */
3705 set_masks
&= ~rs6000_isa_flags_explicit
;
3707 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3708 the cpu in a target attribute or pragma, but did not specify a tuning
3709 option, use the cpu for the tuning option rather than the option specified
3710 with -mtune on the command line. Process a '--with-cpu' configuration
3711 request as an implicit --cpu. */
3712 if (rs6000_cpu_index
>= 0)
3713 cpu_index
= rs6000_cpu_index
;
3714 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
3715 cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
3716 else if (OPTION_TARGET_CPU_DEFAULT
)
3717 cpu_index
= rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT
);
3719 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3720 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3721 with those from the cpu, except for options that were explicitly set. If
3722 we don't have a cpu, do not override the target bits set in
3726 rs6000_cpu_index
= cpu_index
;
3727 rs6000_isa_flags
&= ~set_masks
;
3728 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
3733 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3734 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3735 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3736 to using rs6000_isa_flags, we need to do the initialization here.
3738 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3739 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3740 HOST_WIDE_INT flags
;
3742 flags
= TARGET_DEFAULT
;
3745 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3746 const char *default_cpu
= (!TARGET_POWERPC64
3751 int default_cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
3752 flags
= processor_target_table
[default_cpu_index
].target_enable
;
3754 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
3757 if (rs6000_tune_index
>= 0)
3758 tune_index
= rs6000_tune_index
;
3759 else if (cpu_index
>= 0)
3760 rs6000_tune_index
= tune_index
= cpu_index
;
3764 enum processor_type tune_proc
3765 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
3768 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
3769 if (processor_target_table
[i
].processor
== tune_proc
)
3777 rs6000_cpu
= processor_target_table
[cpu_index
].processor
;
3779 rs6000_cpu
= TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
;
3781 gcc_assert (tune_index
>= 0);
3782 rs6000_tune
= processor_target_table
[tune_index
].processor
;
3784 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
3785 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
3786 || rs6000_cpu
== PROCESSOR_PPCE5500
)
3789 error ("AltiVec not supported in this target");
3792 /* If we are optimizing big endian systems for space, use the load/store
3793 multiple instructions. */
3794 if (BYTES_BIG_ENDIAN
&& optimize_size
)
3795 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
;
3797 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3798 because the hardware doesn't support the instructions used in little
3799 endian mode, and causes an alignment trap. The 750 does not cause an
3800 alignment trap (except when the target is unaligned). */
3802 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
&& TARGET_MULTIPLE
)
3804 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
3805 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
3806 warning (0, "%qs is not supported on little endian systems",
3810 /* If little-endian, default to -mstrict-align on older processors.
3811 Testing for direct_move matches power8 and later. */
3812 if (!BYTES_BIG_ENDIAN
3813 && !(processor_target_table
[tune_index
].target_enable
3814 & OPTION_MASK_DIRECT_MOVE
))
3815 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
3817 if (!rs6000_fold_gimple
)
3819 "gimple folding of rs6000 builtins has been disabled.\n");
3821 /* Add some warnings for VSX. */
3824 const char *msg
= NULL
;
3825 if (!TARGET_HARD_FLOAT
)
3827 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3828 msg
= N_("%<-mvsx%> requires hardware floating point");
3831 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3832 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3835 else if (TARGET_AVOID_XFORM
> 0)
3836 msg
= N_("%<-mvsx%> needs indexed addressing");
3837 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
3838 & OPTION_MASK_ALTIVEC
))
3840 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3841 msg
= N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3843 msg
= N_("%<-mno-altivec%> disables vsx");
3849 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3850 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3854 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3855 the -mcpu setting to enable options that conflict. */
3856 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
3857 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
3858 | OPTION_MASK_ALTIVEC
3859 | OPTION_MASK_VSX
)) != 0)
3860 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
3861 | OPTION_MASK_DIRECT_MOVE
)
3862 & ~rs6000_isa_flags_explicit
);
3864 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
3865 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
3867 #ifdef XCOFF_DEBUGGING_INFO
3868 /* For AIX default to 64-bit DWARF. */
3869 if (!global_options_set
.x_dwarf_offset_size
)
3870 dwarf_offset_size
= POINTER_SIZE_UNITS
;
3873 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3874 off all of the options that depend on those flags. */
3875 ignore_masks
= rs6000_disable_incompatible_switches ();
3877 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3878 unless the user explicitly used the -mno-<option> to disable the code. */
3879 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_MISC
)
3880 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3881 else if (TARGET_P9_MINMAX
)
3885 if (cpu_index
== PROCESSOR_POWER9
)
3887 /* legacy behavior: allow -mcpu=power9 with certain
3888 capabilities explicitly disabled. */
3889 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3892 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3893 "for <xxx> less than power9", "-mcpu");
3895 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
3896 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
3897 & rs6000_isa_flags_explicit
))
3898 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3899 were explicitly cleared. */
3900 error ("%qs incompatible with explicitly disabled options",
3903 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
3905 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
3906 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
3907 else if (TARGET_VSX
)
3908 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
3909 else if (TARGET_POPCNTD
)
3910 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
3911 else if (TARGET_DFP
)
3912 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
3913 else if (TARGET_CMPB
)
3914 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
3915 else if (TARGET_FPRND
)
3916 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
3917 else if (TARGET_POPCNTB
)
3918 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
3919 else if (TARGET_ALTIVEC
)
3920 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
3922 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
3924 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
3925 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3926 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
3929 if (!TARGET_FPRND
&& TARGET_VSX
)
3931 if (rs6000_isa_flags_explicit
& OPTION_MASK_FPRND
)
3932 /* TARGET_VSX = 1 implies Power 7 and newer */
3933 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3934 rs6000_isa_flags
&= ~OPTION_MASK_FPRND
;
3937 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
3939 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
3940 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3941 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
3944 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
3946 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3947 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3948 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3951 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
3953 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3954 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
3955 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3956 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
3958 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3959 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3960 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
3964 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3966 rs6000_isa_flags
|= OPTION_MASK_VSX
;
3967 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3971 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
3973 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
3974 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3975 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
3978 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3979 silently turn off quad memory mode. */
3980 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
3982 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3983 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3985 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
3986 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3988 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
3989 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
3992 /* Non-atomic quad memory load/store are disabled for little endian, since
3993 the words are reversed, but atomic operations can still be done by
3994 swapping the words. */
3995 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
3997 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3998 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4001 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
4004 /* Assume if the user asked for normal quad memory instructions, they want
4005 the atomic versions as well, unless they explicity told us not to use quad
4006 word atomic instructions. */
4007 if (TARGET_QUAD_MEMORY
4008 && !TARGET_QUAD_MEMORY_ATOMIC
4009 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
4010 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4012 /* If we can shrink-wrap the TOC register save separately, then use
4013 -msave-toc-indirect unless explicitly disabled. */
4014 if ((rs6000_isa_flags_explicit
& OPTION_MASK_SAVE_TOC_INDIRECT
) == 0
4015 && flag_shrink_wrap_separate
4016 && optimize_function_for_speed_p (cfun
))
4017 rs6000_isa_flags
|= OPTION_MASK_SAVE_TOC_INDIRECT
;
4019 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4020 generating power8 instructions. Power9 does not optimize power8 fusion
4022 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4024 if (processor_target_table
[tune_index
].processor
== PROCESSOR_POWER8
)
4025 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4027 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4030 /* Setting additional fusion flags turns on base fusion. */
4031 if (!TARGET_P8_FUSION
&& TARGET_P8_FUSION_SIGN
)
4033 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4035 if (TARGET_P8_FUSION_SIGN
)
4036 error ("%qs requires %qs", "-mpower8-fusion-sign",
4039 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4042 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4045 /* Power8 does not fuse sign extended loads with the addis. If we are
4046 optimizing at high levels for speed, convert a sign extended load into a
4047 zero extending load, and an explicit sign extension. */
4048 if (TARGET_P8_FUSION
4049 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4050 && optimize_function_for_speed_p (cfun
)
4052 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4054 /* ISA 3.0 vector instructions include ISA 2.07. */
4055 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4057 /* We prefer to not mention undocumented options in
4058 error messages. However, if users have managed to select
4059 power9-vector without selecting power8-vector, they
4060 already know about undocumented flags. */
4061 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
4062 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
4063 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4064 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
4066 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4067 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4068 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4072 /* OPTION_MASK_P9_VECTOR is explicit and
4073 OPTION_MASK_P8_VECTOR is not explicit. */
4074 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
4075 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4079 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4080 support. If we only have ISA 2.06 support, and the user did not specify
4081 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4082 but we don't enable the full vectorization support */
4083 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4084 TARGET_ALLOW_MOVMISALIGN
= 1;
4086 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4088 if (TARGET_ALLOW_MOVMISALIGN
> 0
4089 && global_options_set
.x_TARGET_ALLOW_MOVMISALIGN
)
4090 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4092 TARGET_ALLOW_MOVMISALIGN
= 0;
4095 /* Determine when unaligned vector accesses are permitted, and when
4096 they are preferred over masked Altivec loads. Note that if
4097 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4098 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4100 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4104 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4105 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4107 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4110 else if (!TARGET_ALLOW_MOVMISALIGN
)
4112 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4113 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4114 "-mallow-movmisalign");
4116 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4120 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
))
4122 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4123 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4125 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4128 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
))
4130 if (TARGET_MMA
&& TARGET_EFFICIENT_UNALIGNED_VSX
)
4131 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
;
4133 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
;
4136 /* Use long double size to select the appropriate long double. We use
4137 TYPE_PRECISION to differentiate the 3 different long double types. We map
4138 128 into the precision used for TFmode. */
4139 int default_long_double_size
= (RS6000_DEFAULT_LONG_DOUBLE_SIZE
== 64
4141 : FLOAT_PRECISION_TFmode
);
4143 /* Set long double size before the IEEE 128-bit tests. */
4144 if (!global_options_set
.x_rs6000_long_double_type_size
)
4146 if (main_target_opt
!= NULL
4147 && (main_target_opt
->x_rs6000_long_double_type_size
4148 != default_long_double_size
))
4149 error ("target attribute or pragma changes %<long double%> size");
4151 rs6000_long_double_type_size
= default_long_double_size
;
4153 else if (rs6000_long_double_type_size
== 128)
4154 rs6000_long_double_type_size
= FLOAT_PRECISION_TFmode
;
4155 else if (global_options_set
.x_rs6000_ieeequad
)
4157 if (global_options
.x_rs6000_ieeequad
)
4158 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4160 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4163 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4164 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4165 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4166 those systems will not pick up this default. Warn if the user changes the
4167 default unless -Wno-psabi. */
4168 if (!global_options_set
.x_rs6000_ieeequad
)
4169 rs6000_ieeequad
= TARGET_IEEEQUAD_DEFAULT
;
4173 if (global_options
.x_rs6000_ieeequad
4174 && (!TARGET_POPCNTD
|| !TARGET_VSX
))
4175 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4177 if (rs6000_ieeequad
!= TARGET_IEEEQUAD_DEFAULT
&& TARGET_LONG_DOUBLE_128
)
4179 /* Determine if the user can change the default long double type at
4180 compilation time. Only C and C++ support this, and you need GLIBC
4181 2.32 or newer. Only issue one warning. */
4182 static bool warned_change_long_double
;
4184 if (!warned_change_long_double
4185 && (!glibc_supports_ieee_128bit ()
4186 || (!lang_GNU_C () && !lang_GNU_CXX ())))
4188 warned_change_long_double
= true;
4189 if (TARGET_IEEEQUAD
)
4190 warning (OPT_Wpsabi
, "Using IEEE extended precision "
4193 warning (OPT_Wpsabi
, "Using IBM extended precision "
4199 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4200 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4201 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4202 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4203 the keyword as well as the type. */
4204 TARGET_FLOAT128_TYPE
= TARGET_FLOAT128_ENABLE_TYPE
&& TARGET_VSX
;
4206 /* IEEE 128-bit floating point requires VSX support. */
4207 if (TARGET_FLOAT128_KEYWORD
)
4211 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4212 error ("%qs requires VSX support", "-mfloat128");
4214 TARGET_FLOAT128_TYPE
= 0;
4215 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_KEYWORD
4216 | OPTION_MASK_FLOAT128_HW
);
4218 else if (!TARGET_FLOAT128_TYPE
)
4220 TARGET_FLOAT128_TYPE
= 1;
4221 warning (0, "The %<-mfloat128%> option may not be fully supported");
4225 /* Enable the __float128 keyword under Linux by default. */
4226 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_KEYWORD
4227 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4228 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4230 /* If we have are supporting the float128 type and full ISA 3.0 support,
4231 enable -mfloat128-hardware by default. However, don't enable the
4232 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4233 because sometimes the compiler wants to put things in an integer
4234 container, and if we don't have __int128 support, it is impossible. */
4235 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
&& TARGET_64BIT
4236 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4237 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4238 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4240 if (TARGET_FLOAT128_HW
4241 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4243 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4244 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4246 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4249 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4251 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4252 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4254 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4257 /* Enable -mprefixed by default on power10 systems. */
4258 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) == 0)
4259 rs6000_isa_flags
|= OPTION_MASK_PREFIXED
;
4261 /* -mprefixed requires -mcpu=power10 (or later). */
4262 else if (TARGET_PREFIXED
&& !TARGET_POWER10
)
4264 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) != 0)
4265 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4267 rs6000_isa_flags
&= ~OPTION_MASK_PREFIXED
;
4270 /* -mpcrel requires prefixed load/store addressing. */
4271 if (TARGET_PCREL
&& !TARGET_PREFIXED
)
4273 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4274 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4276 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4279 /* Print the options after updating the defaults. */
4280 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4281 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4283 /* E500mc does "better" if we inline more aggressively. Respect the
4284 user's opinion, though. */
4285 if (rs6000_block_move_inline_limit
== 0
4286 && (rs6000_tune
== PROCESSOR_PPCE500MC
4287 || rs6000_tune
== PROCESSOR_PPCE500MC64
4288 || rs6000_tune
== PROCESSOR_PPCE5500
4289 || rs6000_tune
== PROCESSOR_PPCE6500
))
4290 rs6000_block_move_inline_limit
= 128;
4292 /* store_one_arg depends on expand_block_move to handle at least the
4293 size of reg_parm_stack_space. */
4294 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4295 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4299 /* If the appropriate debug option is enabled, replace the target hooks
4300 with debug versions that call the real version and then prints
4301 debugging information. */
4302 if (TARGET_DEBUG_COST
)
4304 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4305 targetm
.address_cost
= rs6000_debug_address_cost
;
4306 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4309 if (TARGET_DEBUG_ADDR
)
4311 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4312 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4313 rs6000_secondary_reload_class_ptr
4314 = rs6000_debug_secondary_reload_class
;
4315 targetm
.secondary_memory_needed
4316 = rs6000_debug_secondary_memory_needed
;
4317 targetm
.can_change_mode_class
4318 = rs6000_debug_can_change_mode_class
;
4319 rs6000_preferred_reload_class_ptr
4320 = rs6000_debug_preferred_reload_class
;
4321 rs6000_mode_dependent_address_ptr
4322 = rs6000_debug_mode_dependent_address
;
4325 if (rs6000_veclibabi_name
)
4327 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4328 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4331 error ("unknown vectorization library ABI type (%qs) for "
4332 "%qs switch", rs6000_veclibabi_name
, "-mveclibabi=");
4338 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4339 target attribute or pragma which automatically enables both options,
4340 unless the altivec ABI was set. This is set by default for 64-bit, but
4342 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4344 TARGET_FLOAT128_TYPE
= 0;
4345 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
4346 | OPTION_MASK_FLOAT128_KEYWORD
)
4347 & ~rs6000_isa_flags_explicit
);
4350 /* Enable Altivec ABI for AIX -maltivec. */
4352 && (TARGET_ALTIVEC
|| TARGET_VSX
)
4353 && !global_options_set
.x_rs6000_altivec_abi
)
4355 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4356 error ("target attribute or pragma changes AltiVec ABI");
4358 rs6000_altivec_abi
= 1;
4361 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4362 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4363 be explicitly overridden in either case. */
4366 if (!global_options_set
.x_rs6000_altivec_abi
4367 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4369 if (main_target_opt
!= NULL
&&
4370 !main_target_opt
->x_rs6000_altivec_abi
)
4371 error ("target attribute or pragma changes AltiVec ABI");
4373 rs6000_altivec_abi
= 1;
4377 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4378 So far, the only darwin64 targets are also MACH-O. */
4380 && DEFAULT_ABI
== ABI_DARWIN
4383 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4384 error ("target attribute or pragma changes darwin64 ABI");
4387 rs6000_darwin64_abi
= 1;
4388 /* Default to natural alignment, for better performance. */
4389 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4393 /* Place FP constants in the constant pool instead of TOC
4394 if section anchors enabled. */
4395 if (flag_section_anchors
4396 && !global_options_set
.x_TARGET_NO_FP_IN_TOC
)
4397 TARGET_NO_FP_IN_TOC
= 1;
4399 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4400 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4402 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4403 SUBTARGET_OVERRIDE_OPTIONS
;
4405 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4406 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4408 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4409 SUB3TARGET_OVERRIDE_OPTIONS
;
4412 /* If the ABI has support for PC-relative relocations, enable it by default.
4413 This test depends on the sub-target tests above setting the code model to
4414 medium for ELF v2 systems. */
4415 if (PCREL_SUPPORTED_BY_OS
4416 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0)
4417 rs6000_isa_flags
|= OPTION_MASK_PCREL
;
4419 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4420 after the subtarget override options are done. */
4421 else if (TARGET_PCREL
&& TARGET_CMODEL
!= CMODEL_MEDIUM
)
4423 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4424 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4426 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4429 /* Enable -mmma by default on power10 systems. */
4430 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) == 0)
4431 rs6000_isa_flags
|= OPTION_MASK_MMA
;
4433 /* Turn off vector pair/mma options on non-power10 systems. */
4434 else if (!TARGET_POWER10
&& TARGET_MMA
)
4436 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4437 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4439 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4442 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4443 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
4445 rs6000_always_hint
= (rs6000_tune
!= PROCESSOR_POWER4
4446 && rs6000_tune
!= PROCESSOR_POWER5
4447 && rs6000_tune
!= PROCESSOR_POWER6
4448 && rs6000_tune
!= PROCESSOR_POWER7
4449 && rs6000_tune
!= PROCESSOR_POWER8
4450 && rs6000_tune
!= PROCESSOR_POWER9
4451 && rs6000_tune
!= PROCESSOR_POWER10
4452 && rs6000_tune
!= PROCESSOR_PPCA2
4453 && rs6000_tune
!= PROCESSOR_CELL
4454 && rs6000_tune
!= PROCESSOR_PPC476
);
4455 rs6000_sched_groups
= (rs6000_tune
== PROCESSOR_POWER4
4456 || rs6000_tune
== PROCESSOR_POWER5
4457 || rs6000_tune
== PROCESSOR_POWER7
4458 || rs6000_tune
== PROCESSOR_POWER8
);
4459 rs6000_align_branch_targets
= (rs6000_tune
== PROCESSOR_POWER4
4460 || rs6000_tune
== PROCESSOR_POWER5
4461 || rs6000_tune
== PROCESSOR_POWER6
4462 || rs6000_tune
== PROCESSOR_POWER7
4463 || rs6000_tune
== PROCESSOR_POWER8
4464 || rs6000_tune
== PROCESSOR_POWER9
4465 || rs6000_tune
== PROCESSOR_POWER10
4466 || rs6000_tune
== PROCESSOR_PPCE500MC
4467 || rs6000_tune
== PROCESSOR_PPCE500MC64
4468 || rs6000_tune
== PROCESSOR_PPCE5500
4469 || rs6000_tune
== PROCESSOR_PPCE6500
);
4471 /* Allow debug switches to override the above settings. These are set to -1
4472 in rs6000.opt to indicate the user hasn't directly set the switch. */
4473 if (TARGET_ALWAYS_HINT
>= 0)
4474 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
4476 if (TARGET_SCHED_GROUPS
>= 0)
4477 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
4479 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
4480 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
4482 rs6000_sched_restricted_insns_priority
4483 = (rs6000_sched_groups
? 1 : 0);
4485 /* Handle -msched-costly-dep option. */
4486 rs6000_sched_costly_dep
4487 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
4489 if (rs6000_sched_costly_dep_str
)
4491 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
4492 rs6000_sched_costly_dep
= no_dep_costly
;
4493 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
4494 rs6000_sched_costly_dep
= all_deps_costly
;
4495 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
4496 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
4497 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
4498 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
4500 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
4501 atoi (rs6000_sched_costly_dep_str
));
4504 /* Handle -minsert-sched-nops option. */
4505 rs6000_sched_insert_nops
4506 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
4508 if (rs6000_sched_insert_nops_str
)
4510 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
4511 rs6000_sched_insert_nops
= sched_finish_none
;
4512 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
4513 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
4514 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
4515 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
4517 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
4518 atoi (rs6000_sched_insert_nops_str
));
4521 /* Handle stack protector */
4522 if (!global_options_set
.x_rs6000_stack_protector_guard
)
4523 #ifdef TARGET_THREAD_SSP_OFFSET
4524 rs6000_stack_protector_guard
= SSP_TLS
;
4526 rs6000_stack_protector_guard
= SSP_GLOBAL
;
4529 #ifdef TARGET_THREAD_SSP_OFFSET
4530 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
4531 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
4534 if (global_options_set
.x_rs6000_stack_protector_guard_offset_str
)
4537 const char *str
= rs6000_stack_protector_guard_offset_str
;
4540 long offset
= strtol (str
, &endp
, 0);
4541 if (!*str
|| *endp
|| errno
)
4542 error ("%qs is not a valid number in %qs", str
,
4543 "-mstack-protector-guard-offset=");
4545 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
4546 || (TARGET_64BIT
&& (offset
& 3)))
4547 error ("%qs is not a valid offset in %qs", str
,
4548 "-mstack-protector-guard-offset=");
4550 rs6000_stack_protector_guard_offset
= offset
;
4553 if (global_options_set
.x_rs6000_stack_protector_guard_reg_str
)
4555 const char *str
= rs6000_stack_protector_guard_reg_str
;
4556 int reg
= decode_reg_name (str
);
4558 if (!IN_RANGE (reg
, 1, 31))
4559 error ("%qs is not a valid base register in %qs", str
,
4560 "-mstack-protector-guard-reg=");
4562 rs6000_stack_protector_guard_reg
= reg
;
4565 if (rs6000_stack_protector_guard
== SSP_TLS
4566 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
4567 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4571 #ifdef TARGET_REGNAMES
4572 /* If the user desires alternate register names, copy in the
4573 alternate names now. */
4574 if (TARGET_REGNAMES
)
4575 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
4578 /* Set aix_struct_return last, after the ABI is determined.
4579 If -maix-struct-return or -msvr4-struct-return was explicitly
4580 used, don't override with the ABI default. */
4581 if (!global_options_set
.x_aix_struct_return
)
4582 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
4585 /* IBM XL compiler defaults to unsigned bitfields. */
4586 if (TARGET_XL_COMPAT
)
4587 flag_signed_bitfields
= 0;
4590 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
4591 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
4593 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
4595 /* We can only guarantee the availability of DI pseudo-ops when
4596 assembling for 64-bit targets. */
4599 targetm
.asm_out
.aligned_op
.di
= NULL
;
4600 targetm
.asm_out
.unaligned_op
.di
= NULL
;
4604 /* Set branch target alignment, if not optimizing for size. */
4607 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4608 aligned 8byte to avoid misprediction by the branch predictor. */
4609 if (rs6000_tune
== PROCESSOR_TITAN
4610 || rs6000_tune
== PROCESSOR_CELL
)
4612 if (flag_align_functions
&& !str_align_functions
)
4613 str_align_functions
= "8";
4614 if (flag_align_jumps
&& !str_align_jumps
)
4615 str_align_jumps
= "8";
4616 if (flag_align_loops
&& !str_align_loops
)
4617 str_align_loops
= "8";
4619 if (rs6000_align_branch_targets
)
4621 if (flag_align_functions
&& !str_align_functions
)
4622 str_align_functions
= "16";
4623 if (flag_align_jumps
&& !str_align_jumps
)
4624 str_align_jumps
= "16";
4625 if (flag_align_loops
&& !str_align_loops
)
4627 can_override_loop_align
= 1;
4628 str_align_loops
= "16";
4633 /* Arrange to save and restore machine status around nested functions. */
4634 init_machine_status
= rs6000_init_machine_status
;
4636 /* We should always be splitting complex arguments, but we can't break
4637 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4638 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
4639 targetm
.calls
.split_complex_arg
= NULL
;
4641 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4642 if (DEFAULT_ABI
== ABI_AIX
)
4643 targetm
.calls
.custom_function_descriptors
= 0;
4646 /* Initialize rs6000_cost with the appropriate target costs. */
4648 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
4650 switch (rs6000_tune
)
4652 case PROCESSOR_RS64A
:
4653 rs6000_cost
= &rs64a_cost
;
4656 case PROCESSOR_MPCCORE
:
4657 rs6000_cost
= &mpccore_cost
;
4660 case PROCESSOR_PPC403
:
4661 rs6000_cost
= &ppc403_cost
;
4664 case PROCESSOR_PPC405
:
4665 rs6000_cost
= &ppc405_cost
;
4668 case PROCESSOR_PPC440
:
4669 rs6000_cost
= &ppc440_cost
;
4672 case PROCESSOR_PPC476
:
4673 rs6000_cost
= &ppc476_cost
;
4676 case PROCESSOR_PPC601
:
4677 rs6000_cost
= &ppc601_cost
;
4680 case PROCESSOR_PPC603
:
4681 rs6000_cost
= &ppc603_cost
;
4684 case PROCESSOR_PPC604
:
4685 rs6000_cost
= &ppc604_cost
;
4688 case PROCESSOR_PPC604e
:
4689 rs6000_cost
= &ppc604e_cost
;
4692 case PROCESSOR_PPC620
:
4693 rs6000_cost
= &ppc620_cost
;
4696 case PROCESSOR_PPC630
:
4697 rs6000_cost
= &ppc630_cost
;
4700 case PROCESSOR_CELL
:
4701 rs6000_cost
= &ppccell_cost
;
4704 case PROCESSOR_PPC750
:
4705 case PROCESSOR_PPC7400
:
4706 rs6000_cost
= &ppc750_cost
;
4709 case PROCESSOR_PPC7450
:
4710 rs6000_cost
= &ppc7450_cost
;
4713 case PROCESSOR_PPC8540
:
4714 case PROCESSOR_PPC8548
:
4715 rs6000_cost
= &ppc8540_cost
;
4718 case PROCESSOR_PPCE300C2
:
4719 case PROCESSOR_PPCE300C3
:
4720 rs6000_cost
= &ppce300c2c3_cost
;
4723 case PROCESSOR_PPCE500MC
:
4724 rs6000_cost
= &ppce500mc_cost
;
4727 case PROCESSOR_PPCE500MC64
:
4728 rs6000_cost
= &ppce500mc64_cost
;
4731 case PROCESSOR_PPCE5500
:
4732 rs6000_cost
= &ppce5500_cost
;
4735 case PROCESSOR_PPCE6500
:
4736 rs6000_cost
= &ppce6500_cost
;
4739 case PROCESSOR_TITAN
:
4740 rs6000_cost
= &titan_cost
;
4743 case PROCESSOR_POWER4
:
4744 case PROCESSOR_POWER5
:
4745 rs6000_cost
= &power4_cost
;
4748 case PROCESSOR_POWER6
:
4749 rs6000_cost
= &power6_cost
;
4752 case PROCESSOR_POWER7
:
4753 rs6000_cost
= &power7_cost
;
4756 case PROCESSOR_POWER8
:
4757 rs6000_cost
= &power8_cost
;
4760 case PROCESSOR_POWER9
:
4761 case PROCESSOR_POWER10
:
4762 rs6000_cost
= &power9_cost
;
4765 case PROCESSOR_PPCA2
:
4766 rs6000_cost
= &ppca2_cost
;
4775 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4776 param_simultaneous_prefetches
,
4777 rs6000_cost
->simultaneous_prefetches
);
4778 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4779 param_l1_cache_size
,
4780 rs6000_cost
->l1_cache_size
);
4781 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4782 param_l1_cache_line_size
,
4783 rs6000_cost
->cache_line_size
);
4784 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4785 param_l2_cache_size
,
4786 rs6000_cost
->l2_cache_size
);
4788 /* Increase loop peeling limits based on performance analysis. */
4789 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4790 param_max_peeled_insns
, 400);
4791 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4792 param_max_completely_peeled_insns
, 400);
4794 /* The lxvl/stxvl instructions don't perform well before Power10. */
4796 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4797 param_vect_partial_vector_usage
, 1);
4799 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4800 param_vect_partial_vector_usage
, 0);
4802 /* Use the 'model' -fsched-pressure algorithm by default. */
4803 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4804 param_sched_pressure_algorithm
,
4805 SCHED_PRESSURE_MODEL
);
4807 /* If using typedef char *va_list, signal that
4808 __builtin_va_start (&ap, 0) can be optimized to
4809 ap = __builtin_next_arg (0). */
4810 if (DEFAULT_ABI
!= ABI_V4
)
4811 targetm
.expand_builtin_va_start
= NULL
;
4814 rs6000_override_options_after_change ();
4816 /* If not explicitly specified via option, decide whether to generate indexed
4817 load/store instructions. A value of -1 indicates that the
4818 initial value of this variable has not been overwritten. During
4819 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4820 if (TARGET_AVOID_XFORM
== -1)
4821 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4822 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4823 need indexed accesses and the type used is the scalar type of the element
4824 being loaded or stored. */
4825 TARGET_AVOID_XFORM
= (rs6000_tune
== PROCESSOR_POWER6
&& TARGET_CMPB
4826 && !TARGET_ALTIVEC
);
4828 /* Set the -mrecip options. */
4829 if (rs6000_recip_name
)
4831 char *p
= ASTRDUP (rs6000_recip_name
);
4833 unsigned int mask
, i
;
4836 while ((q
= strtok (p
, ",")) != NULL
)
4847 if (!strcmp (q
, "default"))
4848 mask
= ((TARGET_RECIP_PRECISION
)
4849 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
4852 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4853 if (!strcmp (q
, recip_options
[i
].string
))
4855 mask
= recip_options
[i
].mask
;
4859 if (i
== ARRAY_SIZE (recip_options
))
4861 error ("unknown option for %<%s=%s%>", "-mrecip", q
);
4869 rs6000_recip_control
&= ~mask
;
4871 rs6000_recip_control
|= mask
;
4875 /* Set the builtin mask of the various options used that could affect which
4876 builtins were used. In the past we used target_flags, but we've run out
4877 of bits, and some options are no longer in target_flags. */
4878 rs6000_builtin_mask
= rs6000_builtin_mask_calculate ();
4879 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
4880 rs6000_print_builtin_options (stderr
, 0, "builtin mask",
4881 rs6000_builtin_mask
);
4883 /* Initialize all of the registers. */
4884 rs6000_init_hard_regno_mode_ok (global_init_p
);
4886 /* Save the initial options in case the user does function specific options */
4888 target_option_default_node
= target_option_current_node
4889 = build_target_option_node (&global_options
, &global_options_set
);
4891 /* If not explicitly specified via option, decide whether to generate the
4892 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4893 if (TARGET_LINK_STACK
== -1)
4894 SET_TARGET_LINK_STACK (rs6000_tune
== PROCESSOR_PPC476
&& flag_pic
);
4896 /* Deprecate use of -mno-speculate-indirect-jumps. */
4897 if (!rs6000_speculate_indirect_jumps
)
4898 warning (0, "%qs is deprecated and not recommended in any circumstances",
4899 "-mno-speculate-indirect-jumps");
4904 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4905 define the target cpu type. */
4908 rs6000_option_override (void)
4910 (void) rs6000_option_override_internal (true);
4914 /* Implement targetm.vectorize.builtin_mask_for_load. */
4916 rs6000_builtin_mask_for_load (void)
4918 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4919 if ((TARGET_ALTIVEC
&& !TARGET_VSX
)
4920 || (TARGET_VSX
&& !TARGET_EFFICIENT_UNALIGNED_VSX
))
4921 return altivec_builtin_mask_for_load
;
4926 /* Implement LOOP_ALIGN. */
4928 rs6000_loop_align (rtx label
)
4933 /* Don't override loop alignment if -falign-loops was specified. */
4934 if (!can_override_loop_align
)
4937 bb
= BLOCK_FOR_INSN (label
);
4938 ninsns
= num_loop_insns(bb
->loop_father
);
4940 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4941 if (ninsns
> 4 && ninsns
<= 8
4942 && (rs6000_tune
== PROCESSOR_POWER4
4943 || rs6000_tune
== PROCESSOR_POWER5
4944 || rs6000_tune
== PROCESSOR_POWER6
4945 || rs6000_tune
== PROCESSOR_POWER7
4946 || rs6000_tune
== PROCESSOR_POWER8
))
4947 return align_flags (5);
4952 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4953 after applying N number of iterations. This routine does not determine
4954 how may iterations are required to reach desired alignment. */
4957 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
4964 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
4967 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
4977 /* Assuming that all other types are naturally aligned. CHECKME! */
4982 /* Return true if the vector misalignment factor is supported by the
4985 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
4992 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4995 /* Return if movmisalign pattern is not supported for this mode. */
4996 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
4999 if (misalignment
== -1)
5001 /* Misalignment factor is unknown at compile time but we know
5002 it's word aligned. */
5003 if (rs6000_vector_alignment_reachable (type
, is_packed
))
5005 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
5007 if (element_size
== 64 || element_size
== 32)
5014 /* VSX supports word-aligned vector. */
5015 if (misalignment
% 4 == 0)
5021 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5023 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5024 tree vectype
, int misalign
)
5029 switch (type_of_cost
)
5037 case cond_branch_not_taken
:
5041 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5045 /* Power7 has only one permute unit, make it a bit expensive. */
5046 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5051 case vec_promote_demote
:
5052 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5053 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5058 case cond_branch_taken
:
5061 case unaligned_load
:
5062 case vector_gather_load
:
5063 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5064 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5067 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5069 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5071 /* Double word aligned. */
5079 /* Double word aligned. */
5083 /* Unknown misalignment. */
5096 /* Misaligned loads are not supported. */
5099 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5102 case unaligned_store
:
5103 case vector_scatter_store
:
5104 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5107 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5109 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5111 /* Double word aligned. */
5119 /* Double word aligned. */
5123 /* Unknown misalignment. */
5136 /* Misaligned stores are not supported. */
5142 /* This is a rough approximation assuming non-constant elements
5143 constructed into a vector via element insertion. FIXME:
5144 vec_construct is not granular enough for uniformly good
5145 decisions. If the initialization is a splat, this is
5146 cheaper than we estimate. Improve this someday. */
5147 elem_type
= TREE_TYPE (vectype
);
5148 /* 32-bit vectors loaded into registers are stored as double
5149 precision, so we need 2 permutes, 2 converts, and 1 merge
5150 to construct a vector of short floats from them. */
5151 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5152 && TYPE_PRECISION (elem_type
) == 32)
5154 /* On POWER9, integer vector types are built up in GPRs and then
5155 use a direct move (2 cycles). For POWER8 this is even worse,
5156 as we need two direct moves and a merge, and the direct moves
5158 else if (INTEGRAL_TYPE_P (elem_type
))
5160 if (TARGET_P9_VECTOR
)
5161 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5163 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 5;
5166 /* V2DFmode doesn't need a direct move. */
5174 /* Implement targetm.vectorize.preferred_simd_mode. */
5177 rs6000_preferred_simd_mode (scalar_mode mode
)
5179 opt_machine_mode vmode
= mode_for_vector (mode
, 16 / GET_MODE_SIZE (mode
));
5181 if (vmode
.exists () && !VECTOR_MEM_NONE_P (vmode
.require ()))
5182 return vmode
.require ();
5187 typedef struct _rs6000_cost_data
5189 struct loop
*loop_info
;
5193 /* Test for likely overcommitment of vector hardware resources. If a
5194 loop iteration is relatively large, and too large a percentage of
5195 instructions in the loop are vectorized, the cost model may not
5196 adequately reflect delays from unavailable vector resources.
5197 Penalize the loop body cost for this case. */
5200 rs6000_density_test (rs6000_cost_data
*data
)
5202 const int DENSITY_PCT_THRESHOLD
= 85;
5203 const int DENSITY_SIZE_THRESHOLD
= 70;
5204 const int DENSITY_PENALTY
= 10;
5205 struct loop
*loop
= data
->loop_info
;
5206 basic_block
*bbs
= get_loop_body (loop
);
5207 int nbbs
= loop
->num_nodes
;
5208 loop_vec_info loop_vinfo
= loop_vec_info_for_loop (data
->loop_info
);
5209 int vec_cost
= data
->cost
[vect_body
], not_vec_cost
= 0;
5212 for (i
= 0; i
< nbbs
; i
++)
5214 basic_block bb
= bbs
[i
];
5215 gimple_stmt_iterator gsi
;
5217 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5219 gimple
*stmt
= gsi_stmt (gsi
);
5220 if (is_gimple_debug (stmt
))
5223 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
5225 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5226 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5232 density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5234 if (density_pct
> DENSITY_PCT_THRESHOLD
5235 && vec_cost
+ not_vec_cost
> DENSITY_SIZE_THRESHOLD
)
5237 data
->cost
[vect_body
] = vec_cost
* (100 + DENSITY_PENALTY
) / 100;
5238 if (dump_enabled_p ())
5239 dump_printf_loc (MSG_NOTE
, vect_location
,
5240 "density %d%%, cost %d exceeds threshold, penalizing "
5241 "loop body cost by %d%%", density_pct
,
5242 vec_cost
+ not_vec_cost
, DENSITY_PENALTY
);
5246 /* Implement targetm.vectorize.init_cost. */
5248 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5249 instruction is needed by the vectorization. */
5250 static bool rs6000_vect_nonmem
;
5253 rs6000_init_cost (struct loop
*loop_info
)
5255 rs6000_cost_data
*data
= XNEW (struct _rs6000_cost_data
);
5256 data
->loop_info
= loop_info
;
5257 data
->cost
[vect_prologue
] = 0;
5258 data
->cost
[vect_body
] = 0;
5259 data
->cost
[vect_epilogue
] = 0;
5260 rs6000_vect_nonmem
= false;
5264 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5265 For some statement, we would like to further fine-grain tweak the cost on
5266 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5267 information on statement operation codes etc. One typical case here is
5268 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5269 for scalar cost, but it should be priced more whatever transformed to either
5270 compare + branch or compare + isel instructions. */
5273 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind
,
5274 struct _stmt_vec_info
*stmt_info
)
5276 if (kind
== scalar_stmt
&& stmt_info
&& stmt_info
->stmt
5277 && gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
5279 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
5280 if (subcode
== COND_EXPR
)
5287 /* Implement targetm.vectorize.add_stmt_cost. */
5290 rs6000_add_stmt_cost (class vec_info
*vinfo
, void *data
, int count
,
5291 enum vect_cost_for_stmt kind
,
5292 struct _stmt_vec_info
*stmt_info
, tree vectype
,
5293 int misalign
, enum vect_cost_model_location where
)
5295 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
5296 unsigned retval
= 0;
5298 if (flag_vect_cost_model
)
5300 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5302 stmt_cost
+= rs6000_adjust_vect_cost_per_stmt (kind
, stmt_info
);
5303 /* Statements in an inner loop relative to the loop being
5304 vectorized are weighted more heavily. The value here is
5305 arbitrary and could potentially be improved with analysis. */
5306 if (where
== vect_body
&& stmt_info
5307 && stmt_in_inner_loop_p (vinfo
, stmt_info
))
5308 count
*= 50; /* FIXME. */
5310 retval
= (unsigned) (count
* stmt_cost
);
5311 cost_data
->cost
[where
] += retval
;
5313 /* Check whether we're doing something other than just a copy loop.
5314 Not all such loops may be profitably vectorized; see
5315 rs6000_finish_cost. */
5316 if ((kind
== vec_to_scalar
|| kind
== vec_perm
5317 || kind
== vec_promote_demote
|| kind
== vec_construct
5318 || kind
== scalar_to_vec
)
5319 || (where
== vect_body
&& kind
== vector_stmt
))
5320 rs6000_vect_nonmem
= true;
5326 /* For some target specific vectorization cost which can't be handled per stmt,
5327 we check the requisite conditions and adjust the vectorization cost
5328 accordingly if satisfied. One typical example is to model shift cost for
5329 vector with length by counting number of required lengths under condition
5330 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5333 rs6000_adjust_vect_cost_per_loop (rs6000_cost_data
*data
)
5335 struct loop
*loop
= data
->loop_info
;
5337 loop_vec_info loop_vinfo
= loop_vec_info_for_loop (loop
);
5339 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
5341 rgroup_controls
*rgc
;
5342 unsigned int num_vectors_m1
;
5343 unsigned int shift_cnt
= 0;
5344 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo
), num_vectors_m1
, rgc
)
5346 /* Each length needs one shift to fill into bits 0-7. */
5347 shift_cnt
+= num_vectors_m1
+ 1;
5349 rs6000_add_stmt_cost (loop_vinfo
, (void *) data
, shift_cnt
, scalar_stmt
,
5350 NULL
, NULL_TREE
, 0, vect_body
);
5354 /* Implement targetm.vectorize.finish_cost. */
5357 rs6000_finish_cost (void *data
, unsigned *prologue_cost
,
5358 unsigned *body_cost
, unsigned *epilogue_cost
)
5360 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
5362 if (cost_data
->loop_info
)
5364 rs6000_adjust_vect_cost_per_loop (cost_data
);
5365 rs6000_density_test (cost_data
);
5368 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5369 that require versioning for any reason. The vectorization is at
5370 best a wash inside the loop, and the versioning checks make
5371 profitability highly unlikely and potentially quite harmful. */
5372 if (cost_data
->loop_info
)
5374 loop_vec_info vec_info
= loop_vec_info_for_loop (cost_data
->loop_info
);
5375 if (!rs6000_vect_nonmem
5376 && LOOP_VINFO_VECT_FACTOR (vec_info
) == 2
5377 && LOOP_REQUIRES_VERSIONING (vec_info
))
5378 cost_data
->cost
[vect_body
] += 10000;
5381 *prologue_cost
= cost_data
->cost
[vect_prologue
];
5382 *body_cost
= cost_data
->cost
[vect_body
];
5383 *epilogue_cost
= cost_data
->cost
[vect_epilogue
];
5386 /* Implement targetm.vectorize.destroy_cost_data. */
5389 rs6000_destroy_cost_data (void *data
)
5394 /* Implement targetm.loop_unroll_adjust. */
5397 rs6000_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
5399 if (unroll_only_small_loops
)
5401 /* TODO: These are hardcoded values right now. We probably should use
5403 if (loop
->ninsns
<= 6)
5404 return MIN (4, nunroll
);
5405 if (loop
->ninsns
<= 10)
5406 return MIN (2, nunroll
);
5414 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5415 library with vectorized intrinsics. */
5418 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5422 const char *suffix
= NULL
;
5423 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5426 machine_mode el_mode
, in_mode
;
5429 /* Libmass is suitable for unsafe math only as it does not correctly support
5430 parts of IEEE with the required precision such as denormals. Only support
5431 it if we have VSX to use the simd d2 or f4 functions.
5432 XXX: Add variable length support. */
5433 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5436 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5437 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5438 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5439 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5440 if (el_mode
!= in_mode
5476 if (el_mode
== DFmode
&& n
== 2)
5478 bdecl
= mathfn_built_in (double_type_node
, fn
);
5479 suffix
= "d2"; /* pow -> powd2 */
5481 else if (el_mode
== SFmode
&& n
== 4)
5483 bdecl
= mathfn_built_in (float_type_node
, fn
);
5484 suffix
= "4"; /* powf -> powf4 */
5496 gcc_assert (suffix
!= NULL
);
5497 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
5501 strcpy (name
, bname
+ strlen ("__builtin_"));
5502 strcat (name
, suffix
);
5505 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
5506 else if (n_args
== 2)
5507 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
5511 /* Build a function declaration for the vectorized function. */
5512 new_fndecl
= build_decl (BUILTINS_LOCATION
,
5513 FUNCTION_DECL
, get_identifier (name
), fntype
);
5514 TREE_PUBLIC (new_fndecl
) = 1;
5515 DECL_EXTERNAL (new_fndecl
) = 1;
5516 DECL_IS_NOVOPS (new_fndecl
) = 1;
5517 TREE_READONLY (new_fndecl
) = 1;
5522 /* Returns a function decl for a vectorized version of the builtin function
5523 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5524 if it is not available. */
5527 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
5530 machine_mode in_mode
, out_mode
;
5533 if (TARGET_DEBUG_BUILTIN
)
5534 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5535 combined_fn_name (combined_fn (fn
)),
5536 GET_MODE_NAME (TYPE_MODE (type_out
)),
5537 GET_MODE_NAME (TYPE_MODE (type_in
)));
5539 if (TREE_CODE (type_out
) != VECTOR_TYPE
5540 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5543 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5544 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5545 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5546 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5551 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5552 && out_mode
== DFmode
&& out_n
== 2
5553 && in_mode
== DFmode
&& in_n
== 2)
5554 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNDP
];
5555 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5556 && out_mode
== SFmode
&& out_n
== 4
5557 && in_mode
== SFmode
&& in_n
== 4)
5558 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNSP
];
5559 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5560 && out_mode
== SFmode
&& out_n
== 4
5561 && in_mode
== SFmode
&& in_n
== 4)
5562 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_COPYSIGN_V4SF
];
5565 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5566 && out_mode
== DFmode
&& out_n
== 2
5567 && in_mode
== DFmode
&& in_n
== 2)
5568 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIP
];
5569 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5570 && out_mode
== SFmode
&& out_n
== 4
5571 && in_mode
== SFmode
&& in_n
== 4)
5572 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIP
];
5573 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5574 && out_mode
== SFmode
&& out_n
== 4
5575 && in_mode
== SFmode
&& in_n
== 4)
5576 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIP
];
5579 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5580 && out_mode
== DFmode
&& out_n
== 2
5581 && in_mode
== DFmode
&& in_n
== 2)
5582 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIM
];
5583 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5584 && out_mode
== SFmode
&& out_n
== 4
5585 && in_mode
== SFmode
&& in_n
== 4)
5586 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIM
];
5587 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5588 && out_mode
== SFmode
&& out_n
== 4
5589 && in_mode
== SFmode
&& in_n
== 4)
5590 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIM
];
5593 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5594 && out_mode
== DFmode
&& out_n
== 2
5595 && in_mode
== DFmode
&& in_n
== 2)
5596 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDDP
];
5597 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5598 && out_mode
== SFmode
&& out_n
== 4
5599 && in_mode
== SFmode
&& in_n
== 4)
5600 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDSP
];
5601 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5602 && out_mode
== SFmode
&& out_n
== 4
5603 && in_mode
== SFmode
&& in_n
== 4)
5604 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VMADDFP
];
5607 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5608 && out_mode
== DFmode
&& out_n
== 2
5609 && in_mode
== DFmode
&& in_n
== 2)
5610 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIZ
];
5611 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5612 && out_mode
== SFmode
&& out_n
== 4
5613 && in_mode
== SFmode
&& in_n
== 4)
5614 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIZ
];
5615 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5616 && out_mode
== SFmode
&& out_n
== 4
5617 && in_mode
== SFmode
&& in_n
== 4)
5618 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIZ
];
5621 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5622 && flag_unsafe_math_optimizations
5623 && out_mode
== DFmode
&& out_n
== 2
5624 && in_mode
== DFmode
&& in_n
== 2)
5625 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPI
];
5626 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5627 && flag_unsafe_math_optimizations
5628 && out_mode
== SFmode
&& out_n
== 4
5629 && in_mode
== SFmode
&& in_n
== 4)
5630 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPI
];
5633 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5634 && !flag_trapping_math
5635 && out_mode
== DFmode
&& out_n
== 2
5636 && in_mode
== DFmode
&& in_n
== 2)
5637 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIC
];
5638 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5639 && !flag_trapping_math
5640 && out_mode
== SFmode
&& out_n
== 4
5641 && in_mode
== SFmode
&& in_n
== 4)
5642 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIC
];
5648 /* Generate calls to libmass if appropriate. */
5649 if (rs6000_veclib_handler
)
5650 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
5655 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5658 rs6000_builtin_md_vectorized_function (tree fndecl
, tree type_out
,
5661 machine_mode in_mode
, out_mode
;
5664 if (TARGET_DEBUG_BUILTIN
)
5665 fprintf (stderr
, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5666 IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
5667 GET_MODE_NAME (TYPE_MODE (type_out
)),
5668 GET_MODE_NAME (TYPE_MODE (type_in
)));
5670 if (TREE_CODE (type_out
) != VECTOR_TYPE
5671 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5674 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5675 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5676 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5677 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5679 enum rs6000_builtins fn
5680 = (enum rs6000_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
5683 case RS6000_BUILTIN_RSQRTF
:
5684 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
5685 && out_mode
== SFmode
&& out_n
== 4
5686 && in_mode
== SFmode
&& in_n
== 4)
5687 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRSQRTFP
];
5689 case RS6000_BUILTIN_RSQRT
:
5690 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5691 && out_mode
== DFmode
&& out_n
== 2
5692 && in_mode
== DFmode
&& in_n
== 2)
5693 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
5695 case RS6000_BUILTIN_RECIPF
:
5696 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
5697 && out_mode
== SFmode
&& out_n
== 4
5698 && in_mode
== SFmode
&& in_n
== 4)
5699 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRECIPFP
];
5701 case RS6000_BUILTIN_RECIP
:
5702 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5703 && out_mode
== DFmode
&& out_n
== 2
5704 && in_mode
== DFmode
&& in_n
== 2)
5705 return rs6000_builtin_decls
[VSX_BUILTIN_RECIP_V2DF
];
5713 /* Default CPU string for rs6000*_file_start functions. */
5714 static const char *rs6000_default_cpu
;
5716 #ifdef USING_ELFOS_H
5717 const char *rs6000_machine
;
5720 rs6000_machine_from_flags (void)
5722 HOST_WIDE_INT flags
= rs6000_isa_flags
;
5724 /* Disable the flags that should never influence the .machine selection. */
5725 flags
&= ~(OPTION_MASK_PPC_GFXOPT
| OPTION_MASK_PPC_GPOPT
);
5727 if ((flags
& (ISA_3_1_MASKS_SERVER
& ~ISA_3_0_MASKS_SERVER
)) != 0)
5729 if ((flags
& (ISA_3_0_MASKS_SERVER
& ~ISA_2_7_MASKS_SERVER
)) != 0)
5731 if ((flags
& (ISA_2_7_MASKS_SERVER
& ~ISA_2_6_MASKS_SERVER
)) != 0)
5733 if ((flags
& (ISA_2_6_MASKS_SERVER
& ~ISA_2_5_MASKS_SERVER
)) != 0)
5735 if ((flags
& (ISA_2_5_MASKS_SERVER
& ~ISA_2_4_MASKS
)) != 0)
5737 if ((flags
& (ISA_2_4_MASKS
& ~ISA_2_1_MASKS
)) != 0)
5739 if ((flags
& ISA_2_1_MASKS
) != 0)
5741 if ((flags
& OPTION_MASK_POWERPC64
) != 0)
5747 emit_asm_machine (void)
5749 fprintf (asm_out_file
, "\t.machine %s\n", rs6000_machine
);
5753 /* Do anything needed at the start of the asm file. */
5756 rs6000_file_start (void)
5759 const char *start
= buffer
;
5760 FILE *file
= asm_out_file
;
5762 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
5764 default_file_start ();
5766 if (flag_verbose_asm
)
5768 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
5770 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
5772 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
5776 if (global_options_set
.x_rs6000_cpu_index
)
5778 fprintf (file
, "%s -mcpu=%s", start
,
5779 processor_target_table
[rs6000_cpu_index
].name
);
5783 if (global_options_set
.x_rs6000_tune_index
)
5785 fprintf (file
, "%s -mtune=%s", start
,
5786 processor_target_table
[rs6000_tune_index
].name
);
5790 if (PPC405_ERRATUM77
)
5792 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
5796 #ifdef USING_ELFOS_H
5797 switch (rs6000_sdata
)
5799 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
5800 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
5801 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
5802 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
5805 if (rs6000_sdata
&& g_switch_value
)
5807 fprintf (file
, "%s -G %d", start
,
5817 #ifdef USING_ELFOS_H
5818 rs6000_machine
= rs6000_machine_from_flags ();
5819 emit_asm_machine ();
5822 if (DEFAULT_ABI
== ABI_ELFv2
)
5823 fprintf (file
, "\t.abiversion 2\n");
5827 /* Return nonzero if this function is known to have a null epilogue. */
5830 direct_return (void)
5832 if (reload_completed
)
5834 rs6000_stack_t
*info
= rs6000_stack_info ();
5836 if (info
->first_gp_reg_save
== 32
5837 && info
->first_fp_reg_save
== 64
5838 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
5839 && ! info
->lr_save_p
5840 && ! info
->cr_save_p
5841 && info
->vrsave_size
== 0
5849 /* Helper for num_insns_constant. Calculate number of instructions to
5850 load VALUE to a single gpr using combinations of addi, addis, ori,
5851 oris, sldi and rldimi instructions. */
5854 num_insns_constant_gpr (HOST_WIDE_INT value
)
5856 /* signed constant loadable with addi */
5857 if (SIGNED_INTEGER_16BIT_P (value
))
5860 /* constant loadable with addis */
5861 else if ((value
& 0xffff) == 0
5862 && (value
>> 31 == -1 || value
>> 31 == 0))
5865 /* PADDI can support up to 34 bit signed integers. */
5866 else if (TARGET_PREFIXED
&& SIGNED_INTEGER_34BIT_P (value
))
5869 else if (TARGET_POWERPC64
)
5871 HOST_WIDE_INT low
= ((value
& 0xffffffff) ^ 0x80000000) - 0x80000000;
5872 HOST_WIDE_INT high
= value
>> 31;
5874 if (high
== 0 || high
== -1)
5879 if (low
== 0 || low
== high
)
5880 return num_insns_constant_gpr (high
) + 1;
5882 return num_insns_constant_gpr (low
) + 1;
5884 return (num_insns_constant_gpr (high
)
5885 + num_insns_constant_gpr (low
) + 1);
5892 /* Helper for num_insns_constant. Allow constants formed by the
5893 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5894 and handle modes that require multiple gprs. */
5897 num_insns_constant_multi (HOST_WIDE_INT value
, machine_mode mode
)
5899 int nregs
= (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5903 HOST_WIDE_INT low
= sext_hwi (value
, BITS_PER_WORD
);
5904 int insns
= num_insns_constant_gpr (low
);
5906 /* We won't get more than 2 from num_insns_constant_gpr
5907 except when TARGET_POWERPC64 and mode is DImode or
5908 wider, so the register mode must be DImode. */
5909 && rs6000_is_valid_and_mask (GEN_INT (low
), DImode
))
5912 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5913 it all at once would be UB. */
5914 value
>>= (BITS_PER_WORD
- 1);
5920 /* Return the number of instructions it takes to form a constant in as
5921 many gprs are needed for MODE. */
5924 num_insns_constant (rtx op
, machine_mode mode
)
5928 switch (GET_CODE (op
))
5934 case CONST_WIDE_INT
:
5937 for (int i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
5938 insns
+= num_insns_constant_multi (CONST_WIDE_INT_ELT (op
, i
),
5945 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (op
);
5947 if (mode
== SFmode
|| mode
== SDmode
)
5952 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv
, l
);
5954 REAL_VALUE_TO_TARGET_SINGLE (*rv
, l
);
5955 /* See the first define_split in rs6000.md handling a
5956 const_double_operand. */
5960 else if (mode
== DFmode
|| mode
== DDmode
)
5965 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv
, l
);
5967 REAL_VALUE_TO_TARGET_DOUBLE (*rv
, l
);
5969 /* See the second (32-bit) and third (64-bit) define_split
5970 in rs6000.md handling a const_double_operand. */
5971 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 1] << 32;
5972 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffffUL
;
5975 else if (mode
== TFmode
|| mode
== TDmode
5976 || mode
== KFmode
|| mode
== IFmode
)
5982 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv
, l
);
5984 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv
, l
);
5986 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 3] << 32;
5987 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 2] & 0xffffffffUL
;
5988 insns
= num_insns_constant_multi (val
, DImode
);
5989 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 2 : 1] << 32;
5990 val
|= l
[WORDS_BIG_ENDIAN
? 3 : 0] & 0xffffffffUL
;
5991 insns
+= num_insns_constant_multi (val
, DImode
);
6003 return num_insns_constant_multi (val
, mode
);
6006 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6007 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6008 corresponding element of the vector, but for V4SFmode, the
6009 corresponding "float" is interpreted as an SImode integer. */
6012 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6016 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6017 gcc_assert (GET_MODE (op
) != V2DImode
6018 && GET_MODE (op
) != V2DFmode
);
6020 tmp
= CONST_VECTOR_ELT (op
, elt
);
6021 if (GET_MODE (op
) == V4SFmode
)
6022 tmp
= gen_lowpart (SImode
, tmp
);
6023 return INTVAL (tmp
);
6026 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6027 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6028 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6029 all items are set to the same value and contain COPIES replicas of the
6030 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6031 operand and the others are set to the value of the operand's msb. */
6034 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6036 machine_mode mode
= GET_MODE (op
);
6037 machine_mode inner
= GET_MODE_INNER (mode
);
6045 HOST_WIDE_INT splat_val
;
6046 HOST_WIDE_INT msb_val
;
6048 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6051 nunits
= GET_MODE_NUNITS (mode
);
6052 bitsize
= GET_MODE_BITSIZE (inner
);
6053 mask
= GET_MODE_MASK (inner
);
6055 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6057 msb_val
= val
>= 0 ? 0 : -1;
6059 /* Construct the value to be splatted, if possible. If not, return 0. */
6060 for (i
= 2; i
<= copies
; i
*= 2)
6062 HOST_WIDE_INT small_val
;
6064 small_val
= splat_val
>> bitsize
;
6066 if (splat_val
!= ((HOST_WIDE_INT
)
6067 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6068 | (small_val
& mask
)))
6070 splat_val
= small_val
;
6073 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6074 if (EASY_VECTOR_15 (splat_val
))
6077 /* Also check if we can splat, and then add the result to itself. Do so if
6078 the value is positive, of if the splat instruction is using OP's mode;
6079 for splat_val < 0, the splat and the add should use the same mode. */
6080 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6081 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6084 /* Also check if are loading up the most significant bit which can be done by
6085 loading up -1 and shifting the value left by -1. */
6086 else if (EASY_VECTOR_MSB (splat_val
, inner
))
6092 /* Check if VAL is present in every STEP-th element, and the
6093 other elements are filled with its most significant bit. */
6094 for (i
= 1; i
< nunits
; ++i
)
6096 HOST_WIDE_INT desired_val
;
6097 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6098 if ((i
& (step
- 1)) == 0)
6101 desired_val
= msb_val
;
6103 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6110 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6111 instruction, filling in the bottom elements with 0 or -1.
6113 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6114 for the number of zeroes to shift in, or negative for the number of 0xff
6117 OP is a CONST_VECTOR. */
6120 vspltis_shifted (rtx op
)
6122 machine_mode mode
= GET_MODE (op
);
6123 machine_mode inner
= GET_MODE_INNER (mode
);
6131 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6134 /* We need to create pseudo registers to do the shift, so don't recognize
6135 shift vector constants after reload. */
6136 if (!can_create_pseudo_p ())
6139 nunits
= GET_MODE_NUNITS (mode
);
6140 mask
= GET_MODE_MASK (inner
);
6142 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6144 /* Check if the value can really be the operand of a vspltis[bhw]. */
6145 if (EASY_VECTOR_15 (val
))
6148 /* Also check if we are loading up the most significant bit which can be done
6149 by loading up -1 and shifting the value left by -1. */
6150 else if (EASY_VECTOR_MSB (val
, inner
))
6156 /* Check if VAL is present in every STEP-th element until we find elements
6157 that are 0 or all 1 bits. */
6158 for (i
= 1; i
< nunits
; ++i
)
6160 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6161 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6163 /* If the value isn't the splat value, check for the remaining elements
6169 for (j
= i
+1; j
< nunits
; ++j
)
6171 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6172 if (const_vector_elt_as_int (op
, elt2
) != 0)
6176 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6179 else if ((elt_val
& mask
) == mask
)
6181 for (j
= i
+1; j
< nunits
; ++j
)
6183 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6184 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6188 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6196 /* If all elements are equal, we don't need to do VLSDOI. */
6201 /* Return true if OP is of the given MODE and can be synthesized
6202 with a vspltisb, vspltish or vspltisw. */
6205 easy_altivec_constant (rtx op
, machine_mode mode
)
6207 unsigned step
, copies
;
6209 if (mode
== VOIDmode
)
6210 mode
= GET_MODE (op
);
6211 else if (mode
!= GET_MODE (op
))
6214 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6216 if (mode
== V2DFmode
)
6217 return zero_constant (op
, mode
);
6219 else if (mode
== V2DImode
)
6221 if (!CONST_INT_P (CONST_VECTOR_ELT (op
, 0))
6222 || !CONST_INT_P (CONST_VECTOR_ELT (op
, 1)))
6225 if (zero_constant (op
, mode
))
6228 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6229 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6235 /* V1TImode is a special container for TImode. Ignore for now. */
6236 else if (mode
== V1TImode
)
6239 /* Start with a vspltisw. */
6240 step
= GET_MODE_NUNITS (mode
) / 4;
6243 if (vspltis_constant (op
, step
, copies
))
6246 /* Then try with a vspltish. */
6252 if (vspltis_constant (op
, step
, copies
))
6255 /* And finally a vspltisb. */
6261 if (vspltis_constant (op
, step
, copies
))
6264 if (vspltis_shifted (op
) != 0)
6270 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6271 result is OP. Abort if it is not possible. */
6274 gen_easy_altivec_constant (rtx op
)
6276 machine_mode mode
= GET_MODE (op
);
6277 int nunits
= GET_MODE_NUNITS (mode
);
6278 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6279 unsigned step
= nunits
/ 4;
6280 unsigned copies
= 1;
6282 /* Start with a vspltisw. */
6283 if (vspltis_constant (op
, step
, copies
))
6284 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6286 /* Then try with a vspltish. */
6292 if (vspltis_constant (op
, step
, copies
))
6293 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6295 /* And finally a vspltisb. */
6301 if (vspltis_constant (op
, step
, copies
))
6302 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6307 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6308 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6310 Return the number of instructions needed (1 or 2) into the address pointed
6313 Return the constant that is being split via CONSTANT_PTR. */
6316 xxspltib_constant_p (rtx op
,
6321 size_t nunits
= GET_MODE_NUNITS (mode
);
6323 HOST_WIDE_INT value
;
6326 /* Set the returned values to out of bound values. */
6327 *num_insns_ptr
= -1;
6328 *constant_ptr
= 256;
6330 if (!TARGET_P9_VECTOR
)
6333 if (mode
== VOIDmode
)
6334 mode
= GET_MODE (op
);
6336 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6339 /* Handle (vec_duplicate <constant>). */
6340 if (GET_CODE (op
) == VEC_DUPLICATE
)
6342 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6343 && mode
!= V2DImode
)
6346 element
= XEXP (op
, 0);
6347 if (!CONST_INT_P (element
))
6350 value
= INTVAL (element
);
6351 if (!IN_RANGE (value
, -128, 127))
6355 /* Handle (const_vector [...]). */
6356 else if (GET_CODE (op
) == CONST_VECTOR
)
6358 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6359 && mode
!= V2DImode
)
6362 element
= CONST_VECTOR_ELT (op
, 0);
6363 if (!CONST_INT_P (element
))
6366 value
= INTVAL (element
);
6367 if (!IN_RANGE (value
, -128, 127))
6370 for (i
= 1; i
< nunits
; i
++)
6372 element
= CONST_VECTOR_ELT (op
, i
);
6373 if (!CONST_INT_P (element
))
6376 if (value
!= INTVAL (element
))
6381 /* Handle integer constants being loaded into the upper part of the VSX
6382 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6383 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6384 else if (CONST_INT_P (op
))
6386 if (!SCALAR_INT_MODE_P (mode
))
6389 value
= INTVAL (op
);
6390 if (!IN_RANGE (value
, -128, 127))
6393 if (!IN_RANGE (value
, -1, 0))
6395 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6398 if (EASY_VECTOR_15 (value
))
6406 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6407 sign extend. Special case 0/-1 to allow getting any VSX register instead
6408 of an Altivec register. */
6409 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6410 && EASY_VECTOR_15 (value
))
6413 /* Return # of instructions and the constant byte for XXSPLTIB. */
6414 if (mode
== V16QImode
)
6417 else if (IN_RANGE (value
, -1, 0))
6423 *constant_ptr
= (int) value
;
6428 output_vec_const_move (rtx
*operands
)
6436 mode
= GET_MODE (dest
);
6440 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6441 int xxspltib_value
= 256;
6444 if (zero_constant (vec
, mode
))
6446 if (TARGET_P9_VECTOR
)
6447 return "xxspltib %x0,0";
6449 else if (dest_vmx_p
)
6450 return "vspltisw %0,0";
6453 return "xxlxor %x0,%x0,%x0";
6456 if (all_ones_constant (vec
, mode
))
6458 if (TARGET_P9_VECTOR
)
6459 return "xxspltib %x0,255";
6461 else if (dest_vmx_p
)
6462 return "vspltisw %0,-1";
6464 else if (TARGET_P8_VECTOR
)
6465 return "xxlorc %x0,%x0,%x0";
6471 if (TARGET_P9_VECTOR
6472 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6476 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6477 return "xxspltib %x0,%2";
6488 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6489 if (zero_constant (vec
, mode
))
6490 return "vspltisw %0,0";
6492 if (all_ones_constant (vec
, mode
))
6493 return "vspltisw %0,-1";
6495 /* Do we need to construct a value using VSLDOI? */
6496 shift
= vspltis_shifted (vec
);
6500 splat_vec
= gen_easy_altivec_constant (vec
);
6501 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6502 operands
[1] = XEXP (splat_vec
, 0);
6503 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6506 switch (GET_MODE (splat_vec
))
6509 return "vspltisw %0,%1";
6512 return "vspltish %0,%1";
6515 return "vspltisb %0,%1";
6525 /* Initialize vector TARGET to VALS. */
6528 rs6000_expand_vector_init (rtx target
, rtx vals
)
6530 machine_mode mode
= GET_MODE (target
);
6531 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6532 unsigned int n_elts
= GET_MODE_NUNITS (mode
);
6533 int n_var
= 0, one_var
= -1;
6534 bool all_same
= true, all_const_zero
= true;
6538 for (i
= 0; i
< n_elts
; ++i
)
6540 x
= XVECEXP (vals
, 0, i
);
6541 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
6542 ++n_var
, one_var
= i
;
6543 else if (x
!= CONST0_RTX (inner_mode
))
6544 all_const_zero
= false;
6546 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6552 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6553 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
6554 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
6556 /* Zero register. */
6557 emit_move_insn (target
, CONST0_RTX (mode
));
6560 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
6562 /* Splat immediate. */
6563 emit_insn (gen_rtx_SET (target
, const_vec
));
6568 /* Load from constant pool. */
6569 emit_move_insn (target
, const_vec
);
6574 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6575 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
6579 size_t num_elements
= all_same
? 1 : 2;
6580 for (i
= 0; i
< num_elements
; i
++)
6582 op
[i
] = XVECEXP (vals
, 0, i
);
6583 /* Just in case there is a SUBREG with a smaller mode, do a
6585 if (GET_MODE (op
[i
]) != inner_mode
)
6587 rtx tmp
= gen_reg_rtx (inner_mode
);
6588 convert_move (tmp
, op
[i
], 0);
6591 /* Allow load with splat double word. */
6592 else if (MEM_P (op
[i
]))
6595 op
[i
] = force_reg (inner_mode
, op
[i
]);
6597 else if (!REG_P (op
[i
]))
6598 op
[i
] = force_reg (inner_mode
, op
[i
]);
6603 if (mode
== V2DFmode
)
6604 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
6606 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
6610 if (mode
== V2DFmode
)
6611 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
6613 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
6618 /* Special case initializing vector int if we are on 64-bit systems with
6619 direct move or we have the ISA 3.0 instructions. */
6620 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
6621 && TARGET_DIRECT_MOVE_64BIT
)
6625 rtx element0
= XVECEXP (vals
, 0, 0);
6626 if (MEM_P (element0
))
6627 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6629 element0
= force_reg (SImode
, element0
);
6631 if (TARGET_P9_VECTOR
)
6632 emit_insn (gen_vsx_splat_v4si (target
, element0
));
6635 rtx tmp
= gen_reg_rtx (DImode
);
6636 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
6637 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
6646 for (i
= 0; i
< 4; i
++)
6647 elements
[i
] = force_reg (SImode
, XVECEXP (vals
, 0, i
));
6649 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
6650 elements
[2], elements
[3]));
6655 /* With single precision floating point on VSX, know that internally single
6656 precision is actually represented as a double, and either make 2 V2DF
6657 vectors, and convert these vectors to single precision, or do one
6658 conversion, and splat the result to the other elements. */
6659 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
6663 rtx element0
= XVECEXP (vals
, 0, 0);
6665 if (TARGET_P9_VECTOR
)
6667 if (MEM_P (element0
))
6668 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6670 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
6675 rtx freg
= gen_reg_rtx (V4SFmode
);
6676 rtx sreg
= force_reg (SFmode
, element0
);
6677 rtx cvt
= (TARGET_XSCVDPSPN
6678 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
6679 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
6682 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
6688 if (TARGET_P8_VECTOR
&& TARGET_POWERPC64
)
6694 for (i
= 0; i
< 4; i
++)
6696 tmp_si
[i
] = gen_reg_rtx (SImode
);
6697 tmp_di
[i
] = gen_reg_rtx (DImode
);
6698 mrg_di
[i
] = gen_reg_rtx (DImode
);
6699 tmp_sf
[i
] = force_reg (SFmode
, XVECEXP (vals
, 0, i
));
6700 emit_insn (gen_movsi_from_sf (tmp_si
[i
], tmp_sf
[i
]));
6701 emit_insn (gen_zero_extendsidi2 (tmp_di
[i
], tmp_si
[i
]));
6704 if (!BYTES_BIG_ENDIAN
)
6706 std::swap (tmp_di
[0], tmp_di
[1]);
6707 std::swap (tmp_di
[2], tmp_di
[3]);
6710 emit_insn (gen_ashldi3 (mrg_di
[0], tmp_di
[0], GEN_INT (32)));
6711 emit_insn (gen_iordi3 (mrg_di
[1], mrg_di
[0], tmp_di
[1]));
6712 emit_insn (gen_ashldi3 (mrg_di
[2], tmp_di
[2], GEN_INT (32)));
6713 emit_insn (gen_iordi3 (mrg_di
[3], mrg_di
[2], tmp_di
[3]));
6715 rtx tmp_v2di
= gen_reg_rtx (V2DImode
);
6716 emit_insn (gen_vsx_concat_v2di (tmp_v2di
, mrg_di
[1], mrg_di
[3]));
6717 emit_move_insn (target
, gen_lowpart (V4SFmode
, tmp_v2di
));
6721 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
6722 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
6723 rtx flt_even
= gen_reg_rtx (V4SFmode
);
6724 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
6725 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
6726 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
6727 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
6728 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
6730 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
6731 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
6732 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
6733 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
6734 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
6740 /* Special case initializing vector short/char that are splats if we are on
6741 64-bit systems with direct move. */
6742 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
6743 && (mode
== V16QImode
|| mode
== V8HImode
))
6745 rtx op0
= XVECEXP (vals
, 0, 0);
6746 rtx di_tmp
= gen_reg_rtx (DImode
);
6749 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
6751 if (mode
== V16QImode
)
6753 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
6754 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
6758 if (mode
== V8HImode
)
6760 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
6761 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
6766 /* Store value to stack temp. Load vector element. Splat. However, splat
6767 of 64-bit items is not supported on Altivec. */
6768 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
6770 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
6771 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
6772 XVECEXP (vals
, 0, 0));
6773 x
= gen_rtx_UNSPEC (VOIDmode
,
6774 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
6775 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
6777 gen_rtx_SET (target
, mem
),
6779 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
6780 gen_rtx_PARALLEL (VOIDmode
,
6781 gen_rtvec (1, const0_rtx
)));
6782 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
6786 /* One field is non-constant. Load constant then overwrite
6790 rtx copy
= copy_rtx (vals
);
6792 /* Load constant part of vector, substitute neighboring value for
6794 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
6795 rs6000_expand_vector_init (target
, copy
);
6797 /* Insert variable. */
6798 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
),
6803 if (TARGET_DIRECT_MOVE
&& (mode
== V16QImode
|| mode
== V8HImode
))
6806 /* Force the values into word_mode registers. */
6807 for (i
= 0; i
< n_elts
; i
++)
6809 rtx tmp
= force_reg (inner_mode
, XVECEXP (vals
, 0, i
));
6810 machine_mode tmode
= TARGET_POWERPC64
? DImode
: SImode
;
6811 op
[i
] = simplify_gen_subreg (tmode
, tmp
, inner_mode
, 0);
6814 /* Take unsigned char big endianness on 64bit as example for below
6815 construction, the input values are: A, B, C, D, ..., O, P. */
6817 if (TARGET_DIRECT_MOVE_128
)
6819 /* Move to VSX register with vec_concat, each has 2 values.
6820 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6821 vr1[1] = { xxxxxxxC, xxxxxxxD };
6823 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
6825 for (i
= 0; i
< n_elts
/ 2; i
++)
6827 vr1
[i
] = gen_reg_rtx (V2DImode
);
6828 emit_insn (gen_vsx_concat_v2di (vr1
[i
], op
[i
* 2],
6832 /* Pack vectors with 2 values into vectors with 4 values.
6833 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6834 vr2[1] = { xxxExxxF, xxxGxxxH };
6835 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6836 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
6838 for (i
= 0; i
< n_elts
/ 4; i
++)
6840 vr2
[i
] = gen_reg_rtx (V4SImode
);
6841 emit_insn (gen_altivec_vpkudum (vr2
[i
], vr1
[i
* 2],
6845 /* Pack vectors with 4 values into vectors with 8 values.
6846 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
6847 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
6849 for (i
= 0; i
< n_elts
/ 8; i
++)
6851 vr3
[i
] = gen_reg_rtx (V8HImode
);
6852 emit_insn (gen_altivec_vpkuwum (vr3
[i
], vr2
[i
* 2],
6856 /* If it's V8HImode, it's done and return it. */
6857 if (mode
== V8HImode
)
6859 emit_insn (gen_rtx_SET (target
, vr3
[0]));
6863 /* Pack vectors with 8 values into 16 values. */
6864 rtx res
= gen_reg_rtx (V16QImode
);
6865 emit_insn (gen_altivec_vpkuhum (res
, vr3
[0], vr3
[1]));
6866 emit_insn (gen_rtx_SET (target
, res
));
6870 rtx (*merge_v16qi
) (rtx
, rtx
, rtx
) = NULL
;
6871 rtx (*merge_v8hi
) (rtx
, rtx
, rtx
) = NULL
;
6872 rtx (*merge_v4si
) (rtx
, rtx
, rtx
) = NULL
;
6875 /* Set up some common gen routines and values. */
6876 if (BYTES_BIG_ENDIAN
)
6878 if (mode
== V16QImode
)
6880 merge_v16qi
= gen_altivec_vmrghb
;
6881 merge_v8hi
= gen_altivec_vmrglh
;
6884 merge_v8hi
= gen_altivec_vmrghh
;
6886 merge_v4si
= gen_altivec_vmrglw
;
6887 perm_idx
= GEN_INT (3);
6891 if (mode
== V16QImode
)
6893 merge_v16qi
= gen_altivec_vmrglb
;
6894 merge_v8hi
= gen_altivec_vmrghh
;
6897 merge_v8hi
= gen_altivec_vmrglh
;
6899 merge_v4si
= gen_altivec_vmrghw
;
6900 perm_idx
= GEN_INT (0);
6903 /* Move to VSX register with direct move.
6904 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
6905 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
6907 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
6909 for (i
= 0; i
< n_elts
; i
++)
6911 vr_qi
[i
] = gen_reg_rtx (V16QImode
);
6912 if (TARGET_POWERPC64
)
6913 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi
[i
], op
[i
]));
6915 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi
[i
], op
[i
]));
6918 /* Merge/move to vector short.
6919 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
6920 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
6922 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
6924 for (i
= 0; i
< 8; i
++)
6927 if (mode
== V16QImode
)
6929 tmp
= gen_reg_rtx (V16QImode
);
6930 emit_insn (merge_v16qi (tmp
, vr_qi
[2 * i
], vr_qi
[2 * i
+ 1]));
6932 vr_hi
[i
] = gen_reg_rtx (V8HImode
);
6933 emit_move_insn (vr_hi
[i
], gen_lowpart (V8HImode
, tmp
));
6936 /* Merge vector short to vector int.
6937 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
6938 vr_si[1] = { xxxxxxxx, xxxxEFGH };
6940 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
6942 for (i
= 0; i
< 4; i
++)
6944 rtx tmp
= gen_reg_rtx (V8HImode
);
6945 emit_insn (merge_v8hi (tmp
, vr_hi
[2 * i
], vr_hi
[2 * i
+ 1]));
6946 vr_si
[i
] = gen_reg_rtx (V4SImode
);
6947 emit_move_insn (vr_si
[i
], gen_lowpart (V4SImode
, tmp
));
6950 /* Merge vector int to vector long.
6951 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
6952 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
6954 for (i
= 0; i
< 2; i
++)
6956 rtx tmp
= gen_reg_rtx (V4SImode
);
6957 emit_insn (merge_v4si (tmp
, vr_si
[2 * i
], vr_si
[2 * i
+ 1]));
6958 vr_di
[i
] = gen_reg_rtx (V2DImode
);
6959 emit_move_insn (vr_di
[i
], gen_lowpart (V2DImode
, tmp
));
6962 rtx res
= gen_reg_rtx (V2DImode
);
6963 emit_insn (gen_vsx_xxpermdi_v2di (res
, vr_di
[0], vr_di
[1], perm_idx
));
6964 emit_insn (gen_rtx_SET (target
, gen_lowpart (mode
, res
)));
6970 /* Construct the vector in memory one field at a time
6971 and load the whole vector. */
6972 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
6973 for (i
= 0; i
< n_elts
; i
++)
6974 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
6975 i
* GET_MODE_SIZE (inner_mode
)),
6976 XVECEXP (vals
, 0, i
));
6977 emit_move_insn (target
, mem
);
6980 /* Set field ELT_RTX of TARGET to VAL. */
6983 rs6000_expand_vector_set (rtx target
, rtx val
, rtx elt_rtx
)
6985 machine_mode mode
= GET_MODE (target
);
6986 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6987 rtx reg
= gen_reg_rtx (mode
);
6989 int width
= GET_MODE_SIZE (inner_mode
);
6992 val
= force_reg (GET_MODE (val
), val
);
6994 if (VECTOR_MEM_VSX_P (mode
))
6996 rtx insn
= NULL_RTX
;
6998 if (mode
== V2DFmode
)
6999 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7001 else if (mode
== V2DImode
)
7002 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7004 else if (TARGET_P9_VECTOR
&& TARGET_POWERPC64
)
7006 if (mode
== V4SImode
)
7007 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7008 else if (mode
== V8HImode
)
7009 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7010 else if (mode
== V16QImode
)
7011 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7012 else if (mode
== V4SFmode
)
7013 insn
= gen_vsx_set_v4sf_p9 (target
, target
, val
, elt_rtx
);
7023 gcc_assert (CONST_INT_P (elt_rtx
));
7025 /* Simplify setting single element vectors like V1TImode. */
7026 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
)
7027 && INTVAL (elt_rtx
) == 0)
7029 emit_move_insn (target
, gen_lowpart (mode
, val
));
7033 /* Load single variable value. */
7034 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7035 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7036 x
= gen_rtx_UNSPEC (VOIDmode
,
7037 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7038 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7040 gen_rtx_SET (reg
, mem
),
7043 /* Linear sequence. */
7044 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7045 for (i
= 0; i
< 16; ++i
)
7046 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7048 /* Set permute mask to insert element into target. */
7049 for (i
= 0; i
< width
; ++i
)
7050 XVECEXP (mask
, 0, INTVAL (elt_rtx
) * width
+ i
) = GEN_INT (i
+ 0x10);
7051 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7053 if (BYTES_BIG_ENDIAN
)
7054 x
= gen_rtx_UNSPEC (mode
,
7055 gen_rtvec (3, target
, reg
,
7056 force_reg (V16QImode
, x
)),
7060 if (TARGET_P9_VECTOR
)
7061 x
= gen_rtx_UNSPEC (mode
,
7062 gen_rtvec (3, reg
, target
,
7063 force_reg (V16QImode
, x
)),
7067 /* Invert selector. We prefer to generate VNAND on P8 so
7068 that future fusion opportunities can kick in, but must
7069 generate VNOR elsewhere. */
7070 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7071 rtx iorx
= (TARGET_P8_VECTOR
7072 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7073 : gen_rtx_AND (V16QImode
, notx
, notx
));
7074 rtx tmp
= gen_reg_rtx (V16QImode
);
7075 emit_insn (gen_rtx_SET (tmp
, iorx
));
7077 /* Permute with operands reversed and adjusted selector. */
7078 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7083 emit_insn (gen_rtx_SET (target
, x
));
7086 /* Extract field ELT from VEC into TARGET. */
7089 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7091 machine_mode mode
= GET_MODE (vec
);
7092 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7095 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7102 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7105 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7108 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7111 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7114 if (TARGET_DIRECT_MOVE_64BIT
)
7116 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7122 if (TARGET_DIRECT_MOVE_64BIT
)
7124 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7130 if (TARGET_DIRECT_MOVE_64BIT
)
7132 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7138 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7139 && TARGET_DIRECT_MOVE_64BIT
)
7141 if (GET_MODE (elt
) != DImode
)
7143 rtx tmp
= gen_reg_rtx (DImode
);
7144 convert_move (tmp
, elt
, 0);
7147 else if (!REG_P (elt
))
7148 elt
= force_reg (DImode
, elt
);
7153 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7157 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7161 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7165 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7169 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7173 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7177 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7185 /* Allocate mode-sized buffer. */
7186 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7188 emit_move_insn (mem
, vec
);
7189 if (CONST_INT_P (elt
))
7191 int modulo_elt
= INTVAL (elt
) % GET_MODE_NUNITS (mode
);
7193 /* Add offset to field within buffer matching vector element. */
7194 mem
= adjust_address_nv (mem
, inner_mode
,
7195 modulo_elt
* GET_MODE_SIZE (inner_mode
));
7196 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7200 unsigned int ele_size
= GET_MODE_SIZE (inner_mode
);
7201 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
7202 rtx new_addr
= gen_reg_rtx (Pmode
);
7204 elt
= gen_rtx_AND (Pmode
, elt
, num_ele_m1
);
7206 elt
= gen_rtx_MULT (Pmode
, elt
, GEN_INT (ele_size
));
7207 new_addr
= gen_rtx_PLUS (Pmode
, XEXP (mem
, 0), elt
);
7208 new_addr
= change_address (mem
, inner_mode
, new_addr
);
7209 emit_move_insn (target
, new_addr
);
7213 /* Return the offset within a memory object (MEM) of a vector type to a given
7214 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7215 the element is constant, we return a constant integer.
7217 Otherwise, we use a base register temporary to calculate the offset after
7218 masking it to fit within the bounds of the vector and scaling it. The
7219 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7220 built-in function. */
7223 get_vector_offset (rtx mem
, rtx element
, rtx base_tmp
, unsigned scalar_size
)
7225 if (CONST_INT_P (element
))
7226 return GEN_INT (INTVAL (element
) * scalar_size
);
7228 /* All insns should use the 'Q' constraint (address is a single register) if
7229 the element number is not a constant. */
7230 gcc_assert (satisfies_constraint_Q (mem
));
7232 /* Mask the element to make sure the element number is between 0 and the
7233 maximum number of elements - 1 so that we don't generate an address
7234 outside the vector. */
7235 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (GET_MODE (mem
)) - 1);
7236 rtx and_op
= gen_rtx_AND (Pmode
, element
, num_ele_m1
);
7237 emit_insn (gen_rtx_SET (base_tmp
, and_op
));
7239 /* Shift the element to get the byte offset from the element number. */
7240 int shift
= exact_log2 (scalar_size
);
7241 gcc_assert (shift
>= 0);
7245 rtx shift_op
= gen_rtx_ASHIFT (Pmode
, base_tmp
, GEN_INT (shift
));
7246 emit_insn (gen_rtx_SET (base_tmp
, shift_op
));
7252 /* Helper function update PC-relative addresses when we are adjusting a memory
7253 address (ADDR) to a vector to point to a scalar field within the vector with
7254 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7255 use the base register temporary (BASE_TMP) to form the address. */
7258 adjust_vec_address_pcrel (rtx addr
, rtx element_offset
, rtx base_tmp
)
7260 rtx new_addr
= NULL
;
7262 gcc_assert (CONST_INT_P (element_offset
));
7264 if (GET_CODE (addr
) == CONST
)
7265 addr
= XEXP (addr
, 0);
7267 if (GET_CODE (addr
) == PLUS
)
7269 rtx op0
= XEXP (addr
, 0);
7270 rtx op1
= XEXP (addr
, 1);
7272 if (CONST_INT_P (op1
))
7274 HOST_WIDE_INT offset
7275 = INTVAL (XEXP (addr
, 1)) + INTVAL (element_offset
);
7282 rtx plus
= gen_rtx_PLUS (Pmode
, op0
, GEN_INT (offset
));
7283 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7289 emit_move_insn (base_tmp
, addr
);
7290 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7294 else if (SYMBOL_REF_P (addr
) || LABEL_REF_P (addr
))
7296 rtx plus
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7297 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7306 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7307 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7308 temporary (BASE_TMP) to fixup the address. Return the new memory address
7309 that is valid for reads or writes to a given register (SCALAR_REG).
7311 This function is expected to be called after reload is completed when we are
7312 splitting insns. The temporary BASE_TMP might be set multiple times with
7316 rs6000_adjust_vec_address (rtx scalar_reg
,
7320 machine_mode scalar_mode
)
7322 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7323 rtx addr
= XEXP (mem
, 0);
7326 gcc_assert (!reg_mentioned_p (base_tmp
, addr
));
7327 gcc_assert (!reg_mentioned_p (base_tmp
, element
));
7329 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7330 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7332 /* Calculate what we need to add to the address to get the element
7334 rtx element_offset
= get_vector_offset (mem
, element
, base_tmp
, scalar_size
);
7336 /* Create the new address pointing to the element within the vector. If we
7337 are adding 0, we don't have to change the address. */
7338 if (element_offset
== const0_rtx
)
7341 /* A simple indirect address can be converted into a reg + offset
7343 else if (REG_P (addr
) || SUBREG_P (addr
))
7344 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7346 /* For references to local static variables, fold a constant offset into the
7348 else if (pcrel_local_address (addr
, Pmode
) && CONST_INT_P (element_offset
))
7349 new_addr
= adjust_vec_address_pcrel (addr
, element_offset
, base_tmp
);
7351 /* Optimize D-FORM addresses with constant offset with a constant element, to
7352 include the element offset in the address directly. */
7353 else if (GET_CODE (addr
) == PLUS
)
7355 rtx op0
= XEXP (addr
, 0);
7356 rtx op1
= XEXP (addr
, 1);
7358 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7359 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7361 /* op0 should never be r0, because r0+offset is not valid. But it
7362 doesn't hurt to make sure it is not r0. */
7363 gcc_assert (reg_or_subregno (op0
) != 0);
7365 /* D-FORM address with constant element number. */
7366 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7367 rtx offset_rtx
= GEN_INT (offset
);
7368 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7372 /* If we don't have a D-FORM address with a constant element number,
7373 add the two elements in the current address. Then add the offset.
7375 Previously, we tried to add the offset to OP1 and change the
7376 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7377 complicated because we had to verify that op1 was not GPR0 and we
7378 had a constant element offset (due to the way ADDI is defined).
7379 By doing the add of OP0 and OP1 first, and then adding in the
7380 offset, it has the benefit that if D-FORM instructions are
7381 allowed, the offset is part of the memory access to the vector
7383 emit_insn (gen_rtx_SET (base_tmp
, gen_rtx_PLUS (Pmode
, op0
, op1
)));
7384 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7390 emit_move_insn (base_tmp
, addr
);
7391 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7394 /* If the address isn't valid, move the address into the temporary base
7395 register. Some reasons it could not be valid include:
7397 The address offset overflowed the 16 or 34 bit offset size;
7398 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7399 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7400 Only X_FORM loads can be done, and the address is D_FORM. */
7402 enum insn_form iform
7403 = address_to_insn_form (new_addr
, scalar_mode
,
7404 reg_to_non_prefixed (scalar_reg
, scalar_mode
));
7406 if (iform
== INSN_FORM_BAD
)
7408 emit_move_insn (base_tmp
, new_addr
);
7409 new_addr
= base_tmp
;
7412 return change_address (mem
, scalar_mode
, new_addr
);
7415 /* Split a variable vec_extract operation into the component instructions. */
7418 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7421 machine_mode mode
= GET_MODE (src
);
7422 machine_mode scalar_mode
= GET_MODE_INNER (GET_MODE (src
));
7423 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7424 int byte_shift
= exact_log2 (scalar_size
);
7426 gcc_assert (byte_shift
>= 0);
7428 /* If we are given a memory address, optimize to load just the element. We
7429 don't have to adjust the vector element number on little endian
7433 emit_move_insn (dest
,
7434 rs6000_adjust_vec_address (dest
, src
, element
, tmp_gpr
,
7439 else if (REG_P (src
) || SUBREG_P (src
))
7441 int num_elements
= GET_MODE_NUNITS (mode
);
7442 int bits_in_element
= mode_to_bits (GET_MODE_INNER (mode
));
7443 int bit_shift
= 7 - exact_log2 (num_elements
);
7445 unsigned int dest_regno
= reg_or_subregno (dest
);
7446 unsigned int src_regno
= reg_or_subregno (src
);
7447 unsigned int element_regno
= reg_or_subregno (element
);
7449 gcc_assert (REG_P (tmp_gpr
));
7451 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7452 a general purpose register. */
7453 if (TARGET_P9_VECTOR
7454 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
7455 && INT_REGNO_P (dest_regno
)
7456 && ALTIVEC_REGNO_P (src_regno
)
7457 && INT_REGNO_P (element_regno
))
7459 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
7460 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
7462 if (mode
== V16QImode
)
7463 emit_insn (BYTES_BIG_ENDIAN
7464 ? gen_vextublx (dest_si
, element_si
, src
)
7465 : gen_vextubrx (dest_si
, element_si
, src
));
7467 else if (mode
== V8HImode
)
7469 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7470 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
7471 emit_insn (BYTES_BIG_ENDIAN
7472 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
7473 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
7479 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7480 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
7481 emit_insn (BYTES_BIG_ENDIAN
7482 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
7483 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
7490 gcc_assert (REG_P (tmp_altivec
));
7492 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7493 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7494 will shift the element into the upper position (adding 3 to convert a
7495 byte shift into a bit shift). */
7496 if (scalar_size
== 8)
7498 if (!BYTES_BIG_ENDIAN
)
7500 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
7506 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7508 emit_insn (gen_rtx_SET (tmp_gpr
,
7509 gen_rtx_AND (DImode
,
7510 gen_rtx_ASHIFT (DImode
,
7517 if (!BYTES_BIG_ENDIAN
)
7519 rtx num_ele_m1
= GEN_INT (num_elements
- 1);
7521 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
7522 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
7528 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
7531 /* Get the value into the lower byte of the Altivec register where VSLO
7533 if (TARGET_P9_VECTOR
)
7534 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
7535 else if (can_create_pseudo_p ())
7536 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
7539 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7540 emit_move_insn (tmp_di
, tmp_gpr
);
7541 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
7544 /* Do the VSLO to get the value into the final location. */
7548 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
7552 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
7557 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7558 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
7559 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7560 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7563 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
7571 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7572 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7573 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
7574 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7576 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
7577 emit_insn (gen_lshrdi3 (tmp_gpr_di
, tmp_gpr_di
,
7578 GEN_INT (64 - bits_in_element
)));
7592 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7593 selects whether the alignment is abi mandated, optional, or
7594 both abi and optional alignment. */
7597 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
7599 if (how
!= align_opt
)
7601 if (TREE_CODE (type
) == VECTOR_TYPE
&& align
< 128)
7605 if (how
!= align_abi
)
7607 if (TREE_CODE (type
) == ARRAY_TYPE
7608 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
7610 if (align
< BITS_PER_WORD
)
7611 align
= BITS_PER_WORD
;
7618 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7619 instructions simply ignore the low bits; VSX memory instructions
7620 are aligned to 4 or 8 bytes. */
7623 rs6000_slow_unaligned_access (machine_mode mode
, unsigned int align
)
7625 return (STRICT_ALIGNMENT
7626 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7627 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && align
< 32)
7628 || ((VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
))
7629 && (int) align
< VECTOR_ALIGN (mode
)))));
7632 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7635 rs6000_special_adjust_field_align_p (tree type
, unsigned int computed
)
7637 if (TARGET_ALTIVEC
&& TREE_CODE (type
) == VECTOR_TYPE
)
7639 if (computed
!= 128)
7642 if (!warned
&& warn_psabi
)
7645 inform (input_location
,
7646 "the layout of aggregates containing vectors with"
7647 " %d-byte alignment has changed in GCC 5",
7648 computed
/ BITS_PER_UNIT
);
7651 /* In current GCC there is no special case. */
7658 /* AIX increases natural record alignment to doubleword if the first
7659 field is an FP double while the FP fields remain word aligned. */
7662 rs6000_special_round_type_align (tree type
, unsigned int computed
,
7663 unsigned int specified
)
7665 unsigned int align
= MAX (computed
, specified
);
7666 tree field
= TYPE_FIELDS (type
);
7668 /* Skip all non field decls */
7669 while (field
!= NULL
7670 && (TREE_CODE (field
) != FIELD_DECL
7671 || DECL_FIELD_ABI_IGNORED (field
)))
7672 field
= DECL_CHAIN (field
);
7674 if (field
!= NULL
&& field
!= type
)
7676 type
= TREE_TYPE (field
);
7677 while (TREE_CODE (type
) == ARRAY_TYPE
)
7678 type
= TREE_TYPE (type
);
7680 if (type
!= error_mark_node
&& TYPE_MODE (type
) == DFmode
)
7681 align
= MAX (align
, 64);
7687 /* Darwin increases record alignment to the natural alignment of
7691 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
7692 unsigned int specified
)
7694 unsigned int align
= MAX (computed
, specified
);
7696 if (TYPE_PACKED (type
))
7699 /* Find the first field, looking down into aggregates. */
7701 tree field
= TYPE_FIELDS (type
);
7702 /* Skip all non field decls */
7703 while (field
!= NULL
7704 && (TREE_CODE (field
) != FIELD_DECL
7705 || DECL_FIELD_ABI_IGNORED (field
)))
7706 field
= DECL_CHAIN (field
);
7709 /* A packed field does not contribute any extra alignment. */
7710 if (DECL_PACKED (field
))
7712 type
= TREE_TYPE (field
);
7713 while (TREE_CODE (type
) == ARRAY_TYPE
)
7714 type
= TREE_TYPE (type
);
7715 } while (AGGREGATE_TYPE_P (type
));
7717 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
)
7718 align
= MAX (align
, TYPE_ALIGN (type
));
7723 /* Return 1 for an operand in small memory on V.4/eabi. */
7726 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
7727 machine_mode mode ATTRIBUTE_UNUSED
)
7732 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
7735 if (DEFAULT_ABI
!= ABI_V4
)
7738 if (SYMBOL_REF_P (op
))
7741 else if (GET_CODE (op
) != CONST
7742 || GET_CODE (XEXP (op
, 0)) != PLUS
7743 || !SYMBOL_REF_P (XEXP (XEXP (op
, 0), 0))
7744 || !CONST_INT_P (XEXP (XEXP (op
, 0), 1)))
7749 rtx sum
= XEXP (op
, 0);
7750 HOST_WIDE_INT summand
;
7752 /* We have to be careful here, because it is the referenced address
7753 that must be 32k from _SDA_BASE_, not just the symbol. */
7754 summand
= INTVAL (XEXP (sum
, 1));
7755 if (summand
< 0 || summand
> g_switch_value
)
7758 sym_ref
= XEXP (sum
, 0);
7761 return SYMBOL_REF_SMALL_P (sym_ref
);
7767 /* Return true if either operand is a general purpose register. */
7770 gpr_or_gpr_p (rtx op0
, rtx op1
)
7772 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
7773 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
7776 /* Return true if this is a move direct operation between GPR registers and
7777 floating point/VSX registers. */
7780 direct_move_p (rtx op0
, rtx op1
)
7782 if (!REG_P (op0
) || !REG_P (op1
))
7785 if (!TARGET_DIRECT_MOVE
)
7788 int regno0
= REGNO (op0
);
7789 int regno1
= REGNO (op1
);
7790 if (!HARD_REGISTER_NUM_P (regno0
) || !HARD_REGISTER_NUM_P (regno1
))
7793 if (INT_REGNO_P (regno0
) && VSX_REGNO_P (regno1
))
7796 if (VSX_REGNO_P (regno0
) && INT_REGNO_P (regno1
))
7802 /* Return true if the ADDR is an acceptable address for a quad memory
7803 operation of mode MODE (either LQ/STQ for general purpose registers, or
7804 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7805 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7806 3.0 LXV/STXV instruction. */
7809 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
7813 if (GET_MODE_SIZE (mode
) < 16)
7816 if (legitimate_indirect_address_p (addr
, strict
))
7819 if (VECTOR_MODE_P (mode
) && !mode_supports_dq_form (mode
))
7822 /* Is this a valid prefixed address? If the bottom four bits of the offset
7823 are non-zero, we could use a prefixed instruction (which does not have the
7824 DQ-form constraint that the traditional instruction had) instead of
7825 forcing the unaligned offset to a GPR. */
7826 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DQ
))
7829 if (GET_CODE (addr
) != PLUS
)
7832 op0
= XEXP (addr
, 0);
7833 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
7836 op1
= XEXP (addr
, 1);
7837 if (!CONST_INT_P (op1
))
7840 return quad_address_offset_p (INTVAL (op1
));
7843 /* Return true if this is a load or store quad operation. This function does
7844 not handle the atomic quad memory instructions. */
7847 quad_load_store_p (rtx op0
, rtx op1
)
7851 if (!TARGET_QUAD_MEMORY
)
7854 else if (REG_P (op0
) && MEM_P (op1
))
7855 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
7856 && quad_memory_operand (op1
, GET_MODE (op1
))
7857 && !reg_overlap_mentioned_p (op0
, op1
));
7859 else if (MEM_P (op0
) && REG_P (op1
))
7860 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
7861 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
7866 if (TARGET_DEBUG_ADDR
)
7868 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
7869 ret
? "true" : "false");
7870 debug_rtx (gen_rtx_SET (op0
, op1
));
7876 /* Given an address, return a constant offset term if one exists. */
7879 address_offset (rtx op
)
7881 if (GET_CODE (op
) == PRE_INC
7882 || GET_CODE (op
) == PRE_DEC
)
7884 else if (GET_CODE (op
) == PRE_MODIFY
7885 || GET_CODE (op
) == LO_SUM
)
7888 if (GET_CODE (op
) == CONST
)
7891 if (GET_CODE (op
) == PLUS
)
7894 if (CONST_INT_P (op
))
7900 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7901 the mode. If we can't find (or don't know) the alignment of the symbol
7902 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7903 should be pessimistic]. Offsets are validated in the same way as for
7906 darwin_rs6000_legitimate_lo_sum_const_p (rtx x
, machine_mode mode
)
7908 /* We should not get here with this. */
7909 gcc_checking_assert (! mode_supports_dq_form (mode
));
7911 if (GET_CODE (x
) == CONST
)
7914 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
7915 x
= XVECEXP (x
, 0, 0);
7918 unsigned HOST_WIDE_INT offset
= 0;
7920 if (GET_CODE (x
) == PLUS
)
7923 if (! SYMBOL_REF_P (sym
))
7925 if (!CONST_INT_P (XEXP (x
, 1)))
7927 offset
= INTVAL (XEXP (x
, 1));
7929 else if (SYMBOL_REF_P (x
))
7931 else if (CONST_INT_P (x
))
7932 offset
= INTVAL (x
);
7933 else if (GET_CODE (x
) == LABEL_REF
)
7934 offset
= 0; // We assume code labels are Pmode aligned
7936 return false; // not sure what we have here.
7938 /* If we don't know the alignment of the thing to which the symbol refers,
7939 we assume optimistically it is "enough".
7940 ??? maybe we should be pessimistic instead. */
7945 tree decl
= SYMBOL_REF_DECL (sym
);
7947 if (MACHO_SYMBOL_INDIRECTION_P (sym
))
7948 /* The decl in an indirection symbol is the original one, which might
7949 be less aligned than the indirection. Our indirections are always
7954 if (decl
&& DECL_ALIGN (decl
))
7955 align
= DECL_ALIGN_UNIT (decl
);
7958 unsigned int extra
= 0;
7964 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7966 if (VECTOR_MEM_VSX_P (mode
))
7969 if (!TARGET_POWERPC64
)
7971 else if ((offset
& 3) || (align
& 3))
7982 if (!TARGET_POWERPC64
)
7984 else if ((offset
& 3) || (align
& 3))
7992 /* We only care if the access(es) would cause a change to the high part. */
7993 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
7994 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
7997 /* Return true if the MEM operand is a memory operand suitable for use
7998 with a (full width, possibly multiple) gpr load/store. On
7999 powerpc64 this means the offset must be divisible by 4.
8000 Implements 'Y' constraint.
8002 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8003 a constraint function we know the operand has satisfied a suitable
8006 Offsetting a lo_sum should not be allowed, except where we know by
8007 alignment that a 32k boundary is not crossed. Note that by
8008 "offsetting" here we mean a further offset to access parts of the
8009 MEM. It's fine to have a lo_sum where the inner address is offset
8010 from a sym, since the same sym+offset will appear in the high part
8011 of the address calculation. */
8014 mem_operand_gpr (rtx op
, machine_mode mode
)
8016 unsigned HOST_WIDE_INT offset
;
8018 rtx addr
= XEXP (op
, 0);
8020 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8022 && (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
8023 && mode_supports_pre_incdec_p (mode
)
8024 && legitimate_indirect_address_p (XEXP (addr
, 0), false))
8027 /* Allow prefixed instructions if supported. If the bottom two bits of the
8028 offset are non-zero, we could use a prefixed instruction (which does not
8029 have the DS-form constraint that the traditional instruction had) instead
8030 of forcing the unaligned offset to a GPR. */
8031 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8034 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8035 really OK. Doing this early avoids teaching all the other machinery
8037 if (TARGET_MACHO
&& GET_CODE (addr
) == LO_SUM
)
8038 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr
, 1), mode
);
8040 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8041 if (!rs6000_offsettable_memref_p (op
, mode
, false))
8044 op
= address_offset (addr
);
8048 offset
= INTVAL (op
);
8049 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8052 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8056 if (GET_CODE (addr
) == LO_SUM
)
8057 /* For lo_sum addresses, we must allow any offset except one that
8058 causes a wrap, so test only the low 16 bits. */
8059 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8061 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8064 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8065 enforce an offset divisible by 4 even for 32-bit. */
8068 mem_operand_ds_form (rtx op
, machine_mode mode
)
8070 unsigned HOST_WIDE_INT offset
;
8072 rtx addr
= XEXP (op
, 0);
8074 /* Allow prefixed instructions if supported. If the bottom two bits of the
8075 offset are non-zero, we could use a prefixed instruction (which does not
8076 have the DS-form constraint that the traditional instruction had) instead
8077 of forcing the unaligned offset to a GPR. */
8078 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8081 if (!offsettable_address_p (false, mode
, addr
))
8084 op
= address_offset (addr
);
8088 offset
= INTVAL (op
);
8089 if ((offset
& 3) != 0)
8092 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8096 if (GET_CODE (addr
) == LO_SUM
)
8097 /* For lo_sum addresses, we must allow any offset except one that
8098 causes a wrap, so test only the low 16 bits. */
8099 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8101 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8104 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8107 reg_offset_addressing_ok_p (machine_mode mode
)
8121 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8122 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8123 a vector mode, if we want to use the VSX registers to move it around,
8124 we need to restrict ourselves to reg+reg addressing. Similarly for
8125 IEEE 128-bit floating point that is passed in a single vector
8127 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8128 return mode_supports_dq_form (mode
);
8131 /* The vector pair/quad types support offset addressing if the
8132 underlying vectors support offset addressing. */
8138 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8139 addressing for the LFIWZX and STFIWX instructions. */
8140 if (TARGET_NO_SDMODE_STACK
)
8152 virtual_stack_registers_memory_p (rtx op
)
8157 regnum
= REGNO (op
);
8159 else if (GET_CODE (op
) == PLUS
8160 && REG_P (XEXP (op
, 0))
8161 && CONST_INT_P (XEXP (op
, 1)))
8162 regnum
= REGNO (XEXP (op
, 0));
8167 return (regnum
>= FIRST_VIRTUAL_REGISTER
8168 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8171 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8172 is known to not straddle a 32k boundary. This function is used
8173 to determine whether -mcmodel=medium code can use TOC pointer
8174 relative addressing for OP. This means the alignment of the TOC
8175 pointer must also be taken into account, and unfortunately that is
8178 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8179 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8183 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8187 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8189 if (!SYMBOL_REF_P (op
))
8192 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8194 if (mode_supports_dq_form (mode
))
8197 dsize
= GET_MODE_SIZE (mode
);
8198 decl
= SYMBOL_REF_DECL (op
);
8204 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8205 replacing memory addresses with an anchor plus offset. We
8206 could find the decl by rummaging around in the block->objects
8207 VEC for the given offset but that seems like too much work. */
8208 dalign
= BITS_PER_UNIT
;
8209 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8210 && SYMBOL_REF_ANCHOR_P (op
)
8211 && SYMBOL_REF_BLOCK (op
) != NULL
)
8213 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8215 dalign
= block
->alignment
;
8216 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8218 else if (CONSTANT_POOL_ADDRESS_P (op
))
8220 /* It would be nice to have get_pool_align().. */
8221 machine_mode cmode
= get_pool_mode (op
);
8223 dalign
= GET_MODE_ALIGNMENT (cmode
);
8226 else if (DECL_P (decl
))
8228 dalign
= DECL_ALIGN (decl
);
8232 /* Allow BLKmode when the entire object is known to not
8233 cross a 32k boundary. */
8234 if (!DECL_SIZE_UNIT (decl
))
8237 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8240 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8244 dalign
/= BITS_PER_UNIT
;
8245 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8246 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8247 return dalign
>= dsize
;
8253 /* Find how many bits of the alignment we know for this access. */
8254 dalign
/= BITS_PER_UNIT
;
8255 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8256 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8258 lsb
= offset
& -offset
;
8262 return dalign
>= dsize
;
8266 constant_pool_expr_p (rtx op
)
8270 split_const (op
, &base
, &offset
);
8271 return (SYMBOL_REF_P (base
)
8272 && CONSTANT_POOL_ADDRESS_P (base
)
8273 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8276 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8277 use that as the register to put the HIGH value into if register allocation
8281 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
8283 rtx tocrel
, tocreg
, hi
;
8285 gcc_assert (TARGET_TOC
);
8287 if (TARGET_DEBUG_ADDR
)
8289 if (SYMBOL_REF_P (symbol
))
8290 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8294 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
8295 GET_RTX_NAME (GET_CODE (symbol
)));
8300 if (!can_create_pseudo_p ())
8301 df_set_regs_ever_live (TOC_REGISTER
, true);
8303 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
8304 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
8305 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
8308 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
8309 if (largetoc_reg
!= NULL
)
8311 emit_move_insn (largetoc_reg
, hi
);
8314 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
8317 /* These are only used to pass through from print_operand/print_operand_address
8318 to rs6000_output_addr_const_extra over the intervening function
8319 output_addr_const which is not target code. */
8320 static const_rtx tocrel_base_oac
, tocrel_offset_oac
;
8322 /* Return true if OP is a toc pointer relative address (the output
8323 of create_TOC_reference). If STRICT, do not match non-split
8324 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8325 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8326 TOCREL_OFFSET_RET respectively. */
8329 toc_relative_expr_p (const_rtx op
, bool strict
, const_rtx
*tocrel_base_ret
,
8330 const_rtx
*tocrel_offset_ret
)
8335 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8337 /* When strict ensure we have everything tidy. */
8339 && !(GET_CODE (op
) == LO_SUM
8340 && REG_P (XEXP (op
, 0))
8341 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8344 /* When not strict, allow non-split TOC addresses and also allow
8345 (lo_sum (high ..)) TOC addresses created during reload. */
8346 if (GET_CODE (op
) == LO_SUM
)
8350 const_rtx tocrel_base
= op
;
8351 const_rtx tocrel_offset
= const0_rtx
;
8353 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8355 tocrel_base
= XEXP (op
, 0);
8356 tocrel_offset
= XEXP (op
, 1);
8359 if (tocrel_base_ret
)
8360 *tocrel_base_ret
= tocrel_base
;
8361 if (tocrel_offset_ret
)
8362 *tocrel_offset_ret
= tocrel_offset
;
8364 return (GET_CODE (tocrel_base
) == UNSPEC
8365 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
8366 && REG_P (XVECEXP (tocrel_base
, 0, 1))
8367 && REGNO (XVECEXP (tocrel_base
, 0, 1)) == TOC_REGISTER
);
8370 /* Return true if X is a constant pool address, and also for cmodel=medium
8371 if X is a toc-relative address known to be offsettable within MODE. */
8374 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8377 const_rtx tocrel_base
, tocrel_offset
;
8378 return (toc_relative_expr_p (x
, strict
, &tocrel_base
, &tocrel_offset
)
8379 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8380 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8382 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8383 INTVAL (tocrel_offset
), mode
)));
8387 legitimate_small_data_p (machine_mode mode
, rtx x
)
8389 return (DEFAULT_ABI
== ABI_V4
8390 && !flag_pic
&& !TARGET_TOC
8391 && (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
)
8392 && small_data_operand (x
, mode
));
8396 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8397 bool strict
, bool worst_case
)
8399 unsigned HOST_WIDE_INT offset
;
8402 if (GET_CODE (x
) != PLUS
)
8404 if (!REG_P (XEXP (x
, 0)))
8406 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8408 if (mode_supports_dq_form (mode
))
8409 return quad_address_p (x
, mode
, strict
);
8410 if (!reg_offset_addressing_ok_p (mode
))
8411 return virtual_stack_registers_memory_p (x
);
8412 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8414 if (!CONST_INT_P (XEXP (x
, 1)))
8417 offset
= INTVAL (XEXP (x
, 1));
8424 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8426 if (VECTOR_MEM_VSX_P (mode
))
8431 if (!TARGET_POWERPC64
)
8433 else if (offset
& 3)
8446 if (!TARGET_POWERPC64
)
8448 else if (offset
& 3)
8456 if (TARGET_PREFIXED
)
8457 return SIGNED_34BIT_OFFSET_EXTRA_P (offset
, extra
);
8459 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8463 legitimate_indexed_address_p (rtx x
, int strict
)
8467 if (GET_CODE (x
) != PLUS
)
8473 return (REG_P (op0
) && REG_P (op1
)
8474 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
8475 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
8476 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
8477 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
8481 avoiding_indexed_address_p (machine_mode mode
)
8483 unsigned int msize
= GET_MODE_SIZE (mode
);
8485 /* Avoid indexed addressing for modes that have non-indexed load/store
8486 instruction forms. On power10, vector pairs have an indexed
8487 form, but vector quads don't. */
8491 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
8495 legitimate_indirect_address_p (rtx x
, int strict
)
8497 return REG_P (x
) && INT_REG_OK_FOR_BASE_P (x
, strict
);
8501 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
8503 if (!TARGET_MACHO
|| !flag_pic
8504 || mode
!= SImode
|| !MEM_P (x
))
8508 if (GET_CODE (x
) != LO_SUM
)
8510 if (!REG_P (XEXP (x
, 0)))
8512 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
8516 return CONSTANT_P (x
);
8520 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
8522 if (GET_CODE (x
) != LO_SUM
)
8524 if (!REG_P (XEXP (x
, 0)))
8526 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8528 /* quad word addresses are restricted, and we can't use LO_SUM. */
8529 if (mode_supports_dq_form (mode
))
8533 if (TARGET_ELF
|| TARGET_MACHO
)
8537 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
8539 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8540 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8541 recognizes some LO_SUM addresses as valid although this
8542 function says opposite. In most cases, LRA through different
8543 transformations can generate correct code for address reloads.
8544 It cannot manage only some LO_SUM cases. So we need to add
8545 code here saying that some addresses are still valid. */
8546 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
8547 && small_toc_ref (x
, VOIDmode
));
8548 if (TARGET_TOC
&& ! large_toc_ok
)
8550 if (GET_MODE_NUNITS (mode
) != 1)
8552 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
8553 && !(/* ??? Assume floating point reg based on mode? */
8554 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
8557 return CONSTANT_P (x
) || large_toc_ok
;
8564 /* Try machine-dependent ways of modifying an illegitimate address
8565 to be legitimate. If we find one, return the new, valid address.
8566 This is used from only one place: `memory_address' in explow.c.
8568 OLDX is the address as it was before break_out_memory_refs was
8569 called. In some cases it is useful to look at this to decide what
8572 It is always safe for this function to do nothing. It exists to
8573 recognize opportunities to optimize the output.
8575 On RS/6000, first check for the sum of a register with a constant
8576 integer that is out of range. If so, generate code to add the
8577 constant with the low-order 16 bits masked to the register and force
8578 this result into another register (this can be done with `cau').
8579 Then generate an address of REG+(CONST&0xffff), allowing for the
8580 possibility of bit 16 being a one.
8582 Then check for the sum of a register and something not constant, try to
8583 load the other things into a register and return the sum. */
8586 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
8591 if (!reg_offset_addressing_ok_p (mode
)
8592 || mode_supports_dq_form (mode
))
8594 if (virtual_stack_registers_memory_p (x
))
8597 /* In theory we should not be seeing addresses of the form reg+0,
8598 but just in case it is generated, optimize it away. */
8599 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
8600 return force_reg (Pmode
, XEXP (x
, 0));
8602 /* For TImode with load/store quad, restrict addresses to just a single
8603 pointer, so it works with both GPRs and VSX registers. */
8604 /* Make sure both operands are registers. */
8605 else if (GET_CODE (x
) == PLUS
8606 && (mode
!= TImode
|| !TARGET_VSX
))
8607 return gen_rtx_PLUS (Pmode
,
8608 force_reg (Pmode
, XEXP (x
, 0)),
8609 force_reg (Pmode
, XEXP (x
, 1)));
8611 return force_reg (Pmode
, x
);
8613 if (SYMBOL_REF_P (x
))
8615 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
8617 return rs6000_legitimize_tls_address (x
, model
);
8629 /* As in legitimate_offset_address_p we do not assume
8630 worst-case. The mode here is just a hint as to the registers
8631 used. A TImode is usually in gprs, but may actually be in
8632 fprs. Leave worst-case scenario for reload to handle via
8633 insn constraints. PTImode is only GPRs. */
8640 if (GET_CODE (x
) == PLUS
8641 && REG_P (XEXP (x
, 0))
8642 && CONST_INT_P (XEXP (x
, 1))
8643 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
8644 >= 0x10000 - extra
))
8646 HOST_WIDE_INT high_int
, low_int
;
8648 low_int
= ((INTVAL (XEXP (x
, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8649 if (low_int
>= 0x8000 - extra
)
8651 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
8652 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
8653 gen_int_mode (high_int
, Pmode
)), 0);
8654 return plus_constant (Pmode
, sum
, low_int
);
8656 else if (GET_CODE (x
) == PLUS
8657 && REG_P (XEXP (x
, 0))
8658 && !CONST_INT_P (XEXP (x
, 1))
8659 && GET_MODE_NUNITS (mode
) == 1
8660 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
8661 || (/* ??? Assume floating point reg based on mode? */
8662 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
8663 && !avoiding_indexed_address_p (mode
))
8665 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
8666 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
8668 else if ((TARGET_ELF
8670 || !MACHO_DYNAMIC_NO_PIC_P
8674 && TARGET_NO_TOC_OR_PCREL
8677 && !CONST_WIDE_INT_P (x
)
8678 && !CONST_DOUBLE_P (x
)
8680 && GET_MODE_NUNITS (mode
) == 1
8681 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
8682 || (/* ??? Assume floating point reg based on mode? */
8683 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
))))
8685 rtx reg
= gen_reg_rtx (Pmode
);
8687 emit_insn (gen_elf_high (reg
, x
));
8689 emit_insn (gen_macho_high (Pmode
, reg
, x
));
8690 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
8694 && constant_pool_expr_p (x
)
8695 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
8696 return create_TOC_reference (x
, NULL_RTX
);
8701 /* Debug version of rs6000_legitimize_address. */
8703 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
8709 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
8710 insns
= get_insns ();
8716 "\nrs6000_legitimize_address: mode %s, old code %s, "
8717 "new code %s, modified\n",
8718 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
8719 GET_RTX_NAME (GET_CODE (ret
)));
8721 fprintf (stderr
, "Original address:\n");
8724 fprintf (stderr
, "oldx:\n");
8727 fprintf (stderr
, "New address:\n");
8732 fprintf (stderr
, "Insns added:\n");
8733 debug_rtx_list (insns
, 20);
8739 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8740 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
8751 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8752 We need to emit DTP-relative relocations. */
8754 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
8756 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
8761 fputs ("\t.long\t", file
);
8764 fputs (DOUBLE_INT_ASM_OP
, file
);
8769 output_addr_const (file
, x
);
8771 fputs ("@dtprel+0x8000", file
);
8772 else if (TARGET_XCOFF
&& SYMBOL_REF_P (x
))
8774 switch (SYMBOL_REF_TLS_MODEL (x
))
8778 case TLS_MODEL_LOCAL_EXEC
:
8779 fputs ("@le", file
);
8781 case TLS_MODEL_INITIAL_EXEC
:
8782 fputs ("@ie", file
);
8784 case TLS_MODEL_GLOBAL_DYNAMIC
:
8785 case TLS_MODEL_LOCAL_DYNAMIC
:
8794 /* Return true if X is a symbol that refers to real (rather than emulated)
8798 rs6000_real_tls_symbol_ref_p (rtx x
)
8800 return (SYMBOL_REF_P (x
)
8801 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
8804 /* In the name of slightly smaller debug output, and to cater to
8805 general assembler lossage, recognize various UNSPEC sequences
8806 and turn them back into a direct symbol reference. */
8809 rs6000_delegitimize_address (rtx orig_x
)
8813 if (GET_CODE (orig_x
) == UNSPEC
&& XINT (orig_x
, 1) == UNSPEC_FUSION_GPR
)
8814 orig_x
= XVECEXP (orig_x
, 0, 0);
8816 orig_x
= delegitimize_mem_from_attrs (orig_x
);
8823 if (TARGET_CMODEL
!= CMODEL_SMALL
&& GET_CODE (y
) == LO_SUM
)
8827 if (GET_CODE (y
) == PLUS
8828 && GET_MODE (y
) == Pmode
8829 && CONST_INT_P (XEXP (y
, 1)))
8831 offset
= XEXP (y
, 1);
8835 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_TOCREL
)
8837 y
= XVECEXP (y
, 0, 0);
8840 /* Do not associate thread-local symbols with the original
8841 constant pool symbol. */
8844 && CONSTANT_POOL_ADDRESS_P (y
)
8845 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
8849 if (offset
!= NULL_RTX
)
8850 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
8851 if (!MEM_P (orig_x
))
8854 return replace_equiv_address_nv (orig_x
, y
);
8858 && GET_CODE (orig_x
) == LO_SUM
8859 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
8861 y
= XEXP (XEXP (orig_x
, 1), 0);
8862 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
8863 return XVECEXP (y
, 0, 0);
8869 /* Return true if X shouldn't be emitted into the debug info.
8870 The linker doesn't like .toc section references from
8871 .debug_* sections, so reject .toc section symbols. */
8874 rs6000_const_not_ok_for_debug_p (rtx x
)
8876 if (GET_CODE (x
) == UNSPEC
)
8878 if (SYMBOL_REF_P (x
)
8879 && CONSTANT_POOL_ADDRESS_P (x
))
8881 rtx c
= get_pool_constant (x
);
8882 machine_mode cmode
= get_pool_mode (x
);
8883 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
8890 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8893 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
8895 int icode
= INSN_CODE (insn
);
8897 /* Reject creating doloop insns. Combine should not be allowed
8898 to create these for a number of reasons:
8899 1) In a nested loop, if combine creates one of these in an
8900 outer loop and the register allocator happens to allocate ctr
8901 to the outer loop insn, then the inner loop can't use ctr.
8902 Inner loops ought to be more highly optimized.
8903 2) Combine often wants to create one of these from what was
8904 originally a three insn sequence, first combining the three
8905 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8906 allocated ctr, the splitter takes use back to the three insn
8907 sequence. It's better to stop combine at the two insn
8909 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8910 insns, the register allocator sometimes uses floating point
8911 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8912 jump insn and output reloads are not implemented for jumps,
8913 the ctrsi/ctrdi splitters need to handle all possible cases.
8914 That's a pain, and it gets to be seriously difficult when a
8915 splitter that runs after reload needs memory to transfer from
8916 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8917 for the difficult case. It's better to not create problems
8918 in the first place. */
8919 if (icode
!= CODE_FOR_nothing
8920 && (icode
== CODE_FOR_bdz_si
8921 || icode
== CODE_FOR_bdz_di
8922 || icode
== CODE_FOR_bdnz_si
8923 || icode
== CODE_FOR_bdnz_di
8924 || icode
== CODE_FOR_bdztf_si
8925 || icode
== CODE_FOR_bdztf_di
8926 || icode
== CODE_FOR_bdnztf_si
8927 || icode
== CODE_FOR_bdnztf_di
))
8933 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8935 static GTY(()) rtx rs6000_tls_symbol
;
8937 rs6000_tls_get_addr (void)
8939 if (!rs6000_tls_symbol
)
8940 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
8942 return rs6000_tls_symbol
;
8945 /* Construct the SYMBOL_REF for TLS GOT references. */
8947 static GTY(()) rtx rs6000_got_symbol
;
8949 rs6000_got_sym (void)
8951 if (!rs6000_got_symbol
)
8953 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
8954 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
8955 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
8958 return rs6000_got_symbol
;
8961 /* AIX Thread-Local Address support. */
8964 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
8966 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
, tlsaddr
;
8970 name
= XSTR (addr
, 0);
8971 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8972 or the symbol will be in TLS private data section. */
8973 if (name
[strlen (name
) - 1] != ']'
8974 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr
))
8975 || bss_initializer_p (SYMBOL_REF_DECL (addr
))))
8977 tlsname
= XALLOCAVEC (char, strlen (name
) + 4);
8978 strcpy (tlsname
, name
);
8980 bss_initializer_p (SYMBOL_REF_DECL (addr
)) ? "[UL]" : "[TL]");
8981 tlsaddr
= copy_rtx (addr
);
8982 XSTR (tlsaddr
, 0) = ggc_strdup (tlsname
);
8987 /* Place addr into TOC constant pool. */
8988 sym
= force_const_mem (GET_MODE (tlsaddr
), tlsaddr
);
8990 /* Output the TOC entry and create the MEM referencing the value. */
8991 if (constant_pool_expr_p (XEXP (sym
, 0))
8992 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
8994 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
8995 mem
= gen_const_mem (Pmode
, tocref
);
8996 set_mem_alias_set (mem
, get_TOC_alias_set ());
9001 /* Use global-dynamic for local-dynamic. */
9002 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9003 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9005 /* Create new TOC reference for @m symbol. */
9006 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9007 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9008 strcpy (tlsname
, "*LCM");
9009 strcat (tlsname
, name
+ 3);
9010 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9011 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9012 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9013 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9014 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9016 rtx modreg
= gen_reg_rtx (Pmode
);
9017 emit_insn (gen_rtx_SET (modreg
, modmem
));
9019 tmpreg
= gen_reg_rtx (Pmode
);
9020 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9022 dest
= gen_reg_rtx (Pmode
);
9024 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9026 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9029 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9030 else if (TARGET_32BIT
)
9032 tlsreg
= gen_reg_rtx (SImode
);
9033 emit_insn (gen_tls_get_tpointer (tlsreg
));
9036 tlsreg
= gen_rtx_REG (DImode
, 13);
9038 /* Load the TOC value into temporary register. */
9039 tmpreg
= gen_reg_rtx (Pmode
);
9040 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9041 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9042 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9044 /* Add TOC symbol value to TLS pointer. */
9045 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9050 /* Passes the tls arg value for global dynamic and local dynamic
9051 emit_library_call_value in rs6000_legitimize_tls_address to
9052 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9053 marker relocs put on __tls_get_addr calls. */
9054 static rtx global_tlsarg
;
9056 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9057 this (thread-local) address. */
9060 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9065 return rs6000_legitimize_tls_address_aix (addr
, model
);
9067 dest
= gen_reg_rtx (Pmode
);
9068 if (model
== TLS_MODEL_LOCAL_EXEC
9069 && (rs6000_tls_size
== 16 || rs6000_pcrel_p ()))
9075 tlsreg
= gen_rtx_REG (Pmode
, 13);
9076 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9080 tlsreg
= gen_rtx_REG (Pmode
, 2);
9081 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9085 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9089 tmp
= gen_reg_rtx (Pmode
);
9092 tlsreg
= gen_rtx_REG (Pmode
, 13);
9093 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9097 tlsreg
= gen_rtx_REG (Pmode
, 2);
9098 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9102 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9104 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9109 rtx got
, tga
, tmp1
, tmp2
;
9111 /* We currently use relocations like @got@tlsgd for tls, which
9112 means the linker will handle allocation of tls entries, placing
9113 them in the .got section. So use a pointer to the .got section,
9114 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9115 or to secondary GOT sections used by 32-bit -fPIC. */
9116 if (rs6000_pcrel_p ())
9118 else if (TARGET_64BIT
)
9119 got
= gen_rtx_REG (Pmode
, 2);
9123 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9126 rtx gsym
= rs6000_got_sym ();
9127 got
= gen_reg_rtx (Pmode
);
9129 rs6000_emit_move (got
, gsym
, Pmode
);
9134 tmp1
= gen_reg_rtx (Pmode
);
9135 tmp2
= gen_reg_rtx (Pmode
);
9136 mem
= gen_const_mem (Pmode
, tmp1
);
9137 lab
= gen_label_rtx ();
9138 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9139 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9140 if (TARGET_LINK_STACK
)
9141 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9142 emit_move_insn (tmp2
, mem
);
9143 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9144 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9149 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9151 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addr
, got
),
9153 tga
= rs6000_tls_get_addr ();
9154 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9155 emit_insn (gen_rtx_SET (argreg
, arg
));
9156 global_tlsarg
= arg
;
9157 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9158 global_tlsarg
= NULL_RTX
;
9160 /* Make a note so that the result of this call can be CSEd. */
9161 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9162 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9163 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9165 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9167 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, got
), UNSPEC_TLSLD
);
9168 tga
= rs6000_tls_get_addr ();
9169 tmp1
= gen_reg_rtx (Pmode
);
9170 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9171 emit_insn (gen_rtx_SET (argreg
, arg
));
9172 global_tlsarg
= arg
;
9173 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9174 global_tlsarg
= NULL_RTX
;
9176 /* Make a note so that the result of this call can be CSEd. */
9177 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9178 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9179 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9181 if (rs6000_tls_size
== 16 || rs6000_pcrel_p ())
9184 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9186 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9188 else if (rs6000_tls_size
== 32)
9190 tmp2
= gen_reg_rtx (Pmode
);
9192 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9194 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9197 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9199 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9203 tmp2
= gen_reg_rtx (Pmode
);
9205 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9207 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9209 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9215 /* IE, or 64-bit offset LE. */
9216 tmp2
= gen_reg_rtx (Pmode
);
9218 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9220 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9222 if (rs6000_pcrel_p ())
9225 insn
= gen_tls_tls_pcrel_64 (dest
, tmp2
, addr
);
9227 insn
= gen_tls_tls_pcrel_32 (dest
, tmp2
, addr
);
9229 else if (TARGET_64BIT
)
9230 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9232 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9240 /* Only create the global variable for the stack protect guard if we are using
9241 the global flavor of that guard. */
9243 rs6000_init_stack_protect_guard (void)
9245 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9246 return default_stack_protect_guard ();
9251 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9254 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9256 if (GET_CODE (x
) == HIGH
9257 && GET_CODE (XEXP (x
, 0)) == UNSPEC
)
9260 /* A TLS symbol in the TOC cannot contain a sum. */
9261 if (GET_CODE (x
) == CONST
9262 && GET_CODE (XEXP (x
, 0)) == PLUS
9263 && SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
9264 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9267 /* Do not place an ELF TLS symbol in the constant pool. */
9268 return TARGET_ELF
&& tls_referenced_p (x
);
9271 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9272 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9273 can be addressed relative to the toc pointer. */
9276 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9278 return ((constant_pool_expr_p (sym
)
9279 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9280 get_pool_mode (sym
)))
9281 || (TARGET_CMODEL
== CMODEL_MEDIUM
9282 && SYMBOL_REF_LOCAL_P (sym
)
9283 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9286 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9287 that is a valid memory address for an instruction.
9288 The MODE argument is the machine mode for the MEM expression
9289 that wants to use this address.
9291 On the RS/6000, there are four valid address: a SYMBOL_REF that
9292 refers to a constant pool entry of an address (or the sum of it
9293 plus a constant), a short (16-bit signed) constant plus a register,
9294 the sum of two registers, or a register indirect, possibly with an
9295 auto-increment. For DFmode, DDmode and DImode with a constant plus
9296 register, we must ensure that both words are addressable or PowerPC64
9297 with offset word aligned.
9299 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9300 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9301 because adjacent memory cells are accessed by adding word-sized offsets
9302 during assembly output. */
9304 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
)
9306 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9307 bool quad_offset_p
= mode_supports_dq_form (mode
);
9309 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
9312 /* Handle unaligned altivec lvx/stvx type addresses. */
9313 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
9314 && GET_CODE (x
) == AND
9315 && CONST_INT_P (XEXP (x
, 1))
9316 && INTVAL (XEXP (x
, 1)) == -16)
9319 return (legitimate_indirect_address_p (x
, reg_ok_strict
)
9320 || legitimate_indexed_address_p (x
, reg_ok_strict
)
9321 || virtual_stack_registers_memory_p (x
));
9324 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
9327 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
9328 && mode_supports_pre_incdec_p (mode
)
9329 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
9332 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9333 if (address_is_prefixed (x
, mode
, NON_PREFIXED_DEFAULT
))
9336 /* Handle restricted vector d-form offsets in ISA 3.0. */
9339 if (quad_address_p (x
, mode
, reg_ok_strict
))
9342 else if (virtual_stack_registers_memory_p (x
))
9345 else if (reg_offset_p
)
9347 if (legitimate_small_data_p (mode
, x
))
9349 if (legitimate_constant_pool_address_p (x
, mode
,
9350 reg_ok_strict
|| lra_in_progress
))
9354 /* For TImode, if we have TImode in VSX registers, only allow register
9355 indirect addresses. This will allow the values to go in either GPRs
9356 or VSX registers without reloading. The vector types would tend to
9357 go into VSX registers, so we allow REG+REG, while TImode seems
9358 somewhat split, in that some uses are GPR based, and some VSX based. */
9359 /* FIXME: We could loosen this by changing the following to
9360 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9361 but currently we cannot allow REG+REG addressing for TImode. See
9362 PR72827 for complete details on how this ends up hoodwinking DSE. */
9363 if (mode
== TImode
&& TARGET_VSX
)
9365 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9368 && GET_CODE (x
) == PLUS
9369 && REG_P (XEXP (x
, 0))
9370 && (XEXP (x
, 0) == virtual_stack_vars_rtx
9371 || XEXP (x
, 0) == arg_pointer_rtx
)
9372 && CONST_INT_P (XEXP (x
, 1)))
9374 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
9376 if (!FLOAT128_2REG_P (mode
)
9377 && (TARGET_HARD_FLOAT
9379 || (mode
!= DFmode
&& mode
!= DDmode
))
9380 && (TARGET_POWERPC64
|| mode
!= DImode
)
9381 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
9383 && !avoiding_indexed_address_p (mode
)
9384 && legitimate_indexed_address_p (x
, reg_ok_strict
))
9386 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
9387 && mode_supports_pre_modify_p (mode
)
9388 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
9389 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
9390 reg_ok_strict
, false)
9391 || (!avoiding_indexed_address_p (mode
)
9392 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
9393 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
9395 /* There is no prefixed version of the load/store with update. */
9396 rtx addr
= XEXP (x
, 1);
9397 return !address_is_prefixed (addr
, mode
, NON_PREFIXED_DEFAULT
);
9399 if (reg_offset_p
&& !quad_offset_p
9400 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
9405 /* Debug version of rs6000_legitimate_address_p. */
9407 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
,
9410 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
);
9412 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9413 "strict = %d, reload = %s, code = %s\n",
9414 ret
? "true" : "false",
9415 GET_MODE_NAME (mode
),
9417 (reload_completed
? "after" : "before"),
9418 GET_RTX_NAME (GET_CODE (x
)));
9424 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9427 rs6000_mode_dependent_address_p (const_rtx addr
,
9428 addr_space_t as ATTRIBUTE_UNUSED
)
9430 return rs6000_mode_dependent_address_ptr (addr
);
9433 /* Go to LABEL if ADDR (a legitimate address expression)
9434 has an effect that depends on the machine mode it is used for.
9436 On the RS/6000 this is true of all integral offsets (since AltiVec
9437 and VSX modes don't allow them) or is a pre-increment or decrement.
9439 ??? Except that due to conceptual problems in offsettable_address_p
9440 we can't really report the problems of integral offsets. So leave
9441 this assuming that the adjustable offset must be valid for the
9442 sub-words of a TFmode operand, which is what we had before. */
9445 rs6000_mode_dependent_address (const_rtx addr
)
9447 switch (GET_CODE (addr
))
9450 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9451 is considered a legitimate address before reload, so there
9452 are no offset restrictions in that case. Note that this
9453 condition is safe in strict mode because any address involving
9454 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9455 been rejected as illegitimate. */
9456 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
9457 && XEXP (addr
, 0) != arg_pointer_rtx
9458 && CONST_INT_P (XEXP (addr
, 1)))
9460 HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
9461 HOST_WIDE_INT extra
= TARGET_POWERPC64
? 8 : 12;
9462 if (TARGET_PREFIXED
)
9463 return !SIGNED_34BIT_OFFSET_EXTRA_P (val
, extra
);
9465 return !SIGNED_16BIT_OFFSET_EXTRA_P (val
, extra
);
9470 /* Anything in the constant pool is sufficiently aligned that
9471 all bytes have the same high part address. */
9472 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
9474 /* Auto-increment cases are now treated generically in recog.c. */
9476 return TARGET_UPDATE
;
9478 /* AND is only allowed in Altivec loads. */
9489 /* Debug version of rs6000_mode_dependent_address. */
9491 rs6000_debug_mode_dependent_address (const_rtx addr
)
9493 bool ret
= rs6000_mode_dependent_address (addr
);
9495 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
9496 ret
? "true" : "false");
9502 /* Implement FIND_BASE_TERM. */
9505 rs6000_find_base_term (rtx op
)
9510 if (GET_CODE (base
) == CONST
)
9511 base
= XEXP (base
, 0);
9512 if (GET_CODE (base
) == PLUS
)
9513 base
= XEXP (base
, 0);
9514 if (GET_CODE (base
) == UNSPEC
)
9515 switch (XINT (base
, 1))
9518 case UNSPEC_MACHOPIC_OFFSET
:
9519 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9520 for aliasing purposes. */
9521 return XVECEXP (base
, 0, 0);
9527 /* More elaborate version of recog's offsettable_memref_p predicate
9528 that works around the ??? note of rs6000_mode_dependent_address.
9529 In particular it accepts
9531 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9533 in 32-bit mode, that the recog predicate rejects. */
9536 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
, bool strict
)
9543 /* First mimic offsettable_memref_p. */
9544 if (offsettable_address_p (strict
, GET_MODE (op
), XEXP (op
, 0)))
9547 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9548 the latter predicate knows nothing about the mode of the memory
9549 reference and, therefore, assumes that it is the largest supported
9550 mode (TFmode). As a consequence, legitimate offsettable memory
9551 references are rejected. rs6000_legitimate_offset_address_p contains
9552 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9553 at least with a little bit of help here given that we know the
9554 actual registers used. */
9555 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
9556 || GET_MODE_SIZE (reg_mode
) == 4);
9557 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
9558 strict
, worst_case
);
9561 /* Determine the reassociation width to be used in reassociate_bb.
9562 This takes into account how many parallel operations we
9563 can actually do of a given type, and also the latency.
9567 vect add/sub/mul 2/cycle
9568 fp add/sub/mul 2/cycle
9573 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
9576 switch (rs6000_tune
)
9578 case PROCESSOR_POWER8
:
9579 case PROCESSOR_POWER9
:
9580 case PROCESSOR_POWER10
:
9581 if (DECIMAL_FLOAT_MODE_P (mode
))
9583 if (VECTOR_MODE_P (mode
))
9585 if (INTEGRAL_MODE_P (mode
))
9587 if (FLOAT_MODE_P (mode
))
9596 /* Change register usage conditional on target flags. */
9598 rs6000_conditional_register_usage (void)
9602 if (TARGET_DEBUG_TARGET
)
9603 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
9605 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9607 fixed_regs
[13] = call_used_regs
[13] = 1;
9609 /* Conditionally disable FPRs. */
9610 if (TARGET_SOFT_FLOAT
)
9611 for (i
= 32; i
< 64; i
++)
9612 fixed_regs
[i
] = call_used_regs
[i
] = 1;
9614 /* The TOC register is not killed across calls in a way that is
9615 visible to the compiler. */
9616 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
9617 call_used_regs
[2] = 0;
9619 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
9620 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
9622 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
9623 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
9624 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
9626 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
9627 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
9628 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
9630 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
9631 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
9633 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
9635 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
9636 fixed_regs
[i
] = call_used_regs
[i
] = 1;
9637 call_used_regs
[VRSAVE_REGNO
] = 1;
9640 if (TARGET_ALTIVEC
|| TARGET_VSX
)
9641 global_regs
[VSCR_REGNO
] = 1;
9643 if (TARGET_ALTIVEC_ABI
)
9645 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
9646 call_used_regs
[i
] = 1;
9648 /* AIX reserves VR20:31 in non-extended ABI mode. */
9650 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
9651 fixed_regs
[i
] = call_used_regs
[i
] = 1;
9656 /* Output insns to set DEST equal to the constant SOURCE as a series of
9657 lis, ori and shl instructions and return TRUE. */
9660 rs6000_emit_set_const (rtx dest
, rtx source
)
9662 machine_mode mode
= GET_MODE (dest
);
9667 gcc_checking_assert (CONST_INT_P (source
));
9668 c
= INTVAL (source
);
9673 emit_insn (gen_rtx_SET (dest
, source
));
9677 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
9679 emit_insn (gen_rtx_SET (copy_rtx (temp
),
9680 GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
9681 emit_insn (gen_rtx_SET (dest
,
9682 gen_rtx_IOR (SImode
, copy_rtx (temp
),
9683 GEN_INT (c
& 0xffff))));
9687 if (!TARGET_POWERPC64
)
9691 hi
= operand_subword_force (copy_rtx (dest
), WORDS_BIG_ENDIAN
== 0,
9693 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0,
9695 emit_move_insn (hi
, GEN_INT (c
>> 32));
9696 c
= ((c
& 0xffffffff) ^ 0x80000000) - 0x80000000;
9697 emit_move_insn (lo
, GEN_INT (c
));
9700 rs6000_emit_set_long_const (dest
, c
);
9707 insn
= get_last_insn ();
9708 set
= single_set (insn
);
9709 if (! CONSTANT_P (SET_SRC (set
)))
9710 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
9715 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9716 Output insns to set DEST equal to the constant C as a series of
9717 lis, ori and shl instructions. */
9720 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
)
9723 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
9733 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
9734 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && ! (ud1
& 0x8000)))
9735 emit_move_insn (dest
, GEN_INT ((ud1
^ 0x8000) - 0x8000));
9737 else if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
9738 || (ud4
== 0 && ud3
== 0 && ! (ud2
& 0x8000)))
9740 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9742 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
9743 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
9745 emit_move_insn (dest
,
9746 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9749 else if (ud3
== 0 && ud4
== 0)
9751 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9753 gcc_assert (ud2
& 0x8000);
9754 emit_move_insn (copy_rtx (temp
),
9755 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
9757 emit_move_insn (copy_rtx (temp
),
9758 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9760 emit_move_insn (dest
,
9761 gen_rtx_ZERO_EXTEND (DImode
,
9762 gen_lowpart (SImode
,
9765 else if (ud1
== ud3
&& ud2
== ud4
)
9767 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9768 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
9769 rs6000_emit_set_long_const (temp
, (num
^ 0x80000000) - 0x80000000);
9770 rtx one
= gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff));
9771 rtx two
= gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (32));
9772 emit_move_insn (dest
, gen_rtx_IOR (DImode
, one
, two
));
9774 else if ((ud4
== 0xffff && (ud3
& 0x8000))
9775 || (ud4
== 0 && ! (ud3
& 0x8000)))
9777 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9779 emit_move_insn (copy_rtx (temp
),
9780 GEN_INT (((ud3
<< 16) ^ 0x80000000) - 0x80000000));
9782 emit_move_insn (copy_rtx (temp
),
9783 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9785 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
9786 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
9789 emit_move_insn (dest
,
9790 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9795 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9797 emit_move_insn (copy_rtx (temp
),
9798 GEN_INT (((ud4
<< 16) ^ 0x80000000) - 0x80000000));
9800 emit_move_insn (copy_rtx (temp
),
9801 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9804 emit_move_insn (ud2
!= 0 || ud1
!= 0 ? copy_rtx (temp
) : dest
,
9805 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
9808 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
9809 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9810 GEN_INT (ud2
<< 16)));
9812 emit_move_insn (dest
,
9813 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9818 /* Helper for the following. Get rid of [r+r] memory refs
9819 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9822 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
9824 if (MEM_P (operands
[0])
9825 && !REG_P (XEXP (operands
[0], 0))
9826 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
9827 GET_MODE (operands
[0]), false))
9829 = replace_equiv_address (operands
[0],
9830 copy_addr_to_reg (XEXP (operands
[0], 0)));
9832 if (MEM_P (operands
[1])
9833 && !REG_P (XEXP (operands
[1], 0))
9834 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
9835 GET_MODE (operands
[1]), false))
9837 = replace_equiv_address (operands
[1],
9838 copy_addr_to_reg (XEXP (operands
[1], 0)));
9841 /* Generate a vector of constants to permute MODE for a little-endian
9842 storage operation by swapping the two halves of a vector. */
9844 rs6000_const_vec (machine_mode mode
)
9872 v
= rtvec_alloc (subparts
);
9874 for (i
= 0; i
< subparts
/ 2; ++i
)
9875 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
9876 for (i
= subparts
/ 2; i
< subparts
; ++i
)
9877 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
9882 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9885 rs6000_emit_le_vsx_permute (rtx dest
, rtx source
, machine_mode mode
)
9887 /* Scalar permutations are easier to express in integer modes rather than
9888 floating-point modes, so cast them here. We use V1TImode instead
9889 of TImode to ensure that the values don't go through GPRs. */
9890 if (FLOAT128_VECTOR_P (mode
))
9892 dest
= gen_lowpart (V1TImode
, dest
);
9893 source
= gen_lowpart (V1TImode
, source
);
9897 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9899 if (mode
== TImode
|| mode
== V1TImode
)
9900 emit_insn (gen_rtx_SET (dest
, gen_rtx_ROTATE (mode
, source
,
9904 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
9905 emit_insn (gen_rtx_SET (dest
, gen_rtx_VEC_SELECT (mode
, source
, par
)));
9909 /* Emit a little-endian load from vector memory location SOURCE to VSX
9910 register DEST in mode MODE. The load is done with two permuting
9911 insn's that represent an lxvd2x and xxpermdi. */
9913 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
9915 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9917 if (mode
== TImode
|| mode
== V1TImode
)
9920 dest
= gen_lowpart (V2DImode
, dest
);
9921 source
= adjust_address (source
, V2DImode
, 0);
9924 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
9925 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
9926 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
9929 /* Emit a little-endian store to vector memory location DEST from VSX
9930 register SOURCE in mode MODE. The store is done with two permuting
9931 insn's that represent an xxpermdi and an stxvd2x. */
9933 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
9935 /* This should never be called during or after LRA, because it does
9936 not re-permute the source register. It is intended only for use
9938 gcc_assert (!lra_in_progress
&& !reload_completed
);
9940 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9942 if (mode
== TImode
|| mode
== V1TImode
)
9945 dest
= adjust_address (dest
, V2DImode
, 0);
9946 source
= gen_lowpart (V2DImode
, source
);
9949 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source
) : source
;
9950 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
9951 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
9954 /* Emit a sequence representing a little-endian VSX load or store,
9955 moving data from SOURCE to DEST in mode MODE. This is done
9956 separately from rs6000_emit_move to ensure it is called only
9957 during expand. LE VSX loads and stores introduced later are
9958 handled with a split. The expand-time RTL generation allows
9959 us to optimize away redundant pairs of register-permutes. */
9961 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
9963 gcc_assert (!BYTES_BIG_ENDIAN
9964 && VECTOR_MEM_VSX_P (mode
)
9965 && !TARGET_P9_VECTOR
9966 && !gpr_or_gpr_p (dest
, source
)
9967 && (MEM_P (source
) ^ MEM_P (dest
)));
9971 gcc_assert (REG_P (dest
) || SUBREG_P (dest
));
9972 rs6000_emit_le_vsx_load (dest
, source
, mode
);
9976 if (!REG_P (source
))
9977 source
= force_reg (mode
, source
);
9978 rs6000_emit_le_vsx_store (dest
, source
, mode
);
9982 /* Return whether a SFmode or SImode move can be done without converting one
9983 mode to another. This arrises when we have:
9985 (SUBREG:SF (REG:SI ...))
9986 (SUBREG:SI (REG:SF ...))
9988 and one of the values is in a floating point/vector register, where SFmode
9989 scalars are stored in DFmode format. */
9992 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
9994 if (TARGET_ALLOW_SF_SUBREG
)
9997 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10000 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10003 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10004 if (SUBREG_P (dest
))
10006 rtx dest_subreg
= SUBREG_REG (dest
);
10007 rtx src_subreg
= SUBREG_REG (src
);
10008 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10015 /* Helper function to change moves with:
10017 (SUBREG:SF (REG:SI)) and
10018 (SUBREG:SI (REG:SF))
10020 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10021 values are stored as DFmode values in the VSX registers. We need to convert
10022 the bits before we can use a direct move or operate on the bits in the
10023 vector register as an integer type.
10025 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10028 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10030 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_completed
10031 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10032 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10034 rtx inner_source
= SUBREG_REG (source
);
10035 machine_mode inner_mode
= GET_MODE (inner_source
);
10037 if (mode
== SImode
&& inner_mode
== SFmode
)
10039 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10043 if (mode
== SFmode
&& inner_mode
== SImode
)
10045 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10053 /* Emit a move from SOURCE to DEST in mode MODE. */
10055 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10058 operands
[0] = dest
;
10059 operands
[1] = source
;
10061 if (TARGET_DEBUG_ADDR
)
10064 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10065 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10066 GET_MODE_NAME (mode
),
10069 can_create_pseudo_p ());
10071 fprintf (stderr
, "source:\n");
10072 debug_rtx (source
);
10075 /* Check that we get CONST_WIDE_INT only when we should. */
10076 if (CONST_WIDE_INT_P (operands
[1])
10077 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10078 gcc_unreachable ();
10080 #ifdef HAVE_AS_GNU_ATTRIBUTE
10081 /* If we use a long double type, set the flags in .gnu_attribute that say
10082 what the long double type is. This is to allow the linker's warning
10083 message for the wrong long double to be useful, even if the function does
10084 not do a call (for example, doing a 128-bit add on power9 if the long
10085 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10086 used if they aren't the default long dobule type. */
10087 if (rs6000_gnu_attr
&& (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
))
10089 if (TARGET_LONG_DOUBLE_128
&& (mode
== TFmode
|| mode
== TCmode
))
10090 rs6000_passes_float
= rs6000_passes_long_double
= true;
10092 else if (!TARGET_LONG_DOUBLE_128
&& (mode
== DFmode
|| mode
== DCmode
))
10093 rs6000_passes_float
= rs6000_passes_long_double
= true;
10097 /* See if we need to special case SImode/SFmode SUBREG moves. */
10098 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10099 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10102 /* Check if GCC is setting up a block move that will end up using FP
10103 registers as temporaries. We must make sure this is acceptable. */
10104 if (MEM_P (operands
[0])
10105 && MEM_P (operands
[1])
10107 && (rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[0]))
10108 || rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[1])))
10109 && ! (rs6000_slow_unaligned_access (SImode
,
10110 (MEM_ALIGN (operands
[0]) > 32
10111 ? 32 : MEM_ALIGN (operands
[0])))
10112 || rs6000_slow_unaligned_access (SImode
,
10113 (MEM_ALIGN (operands
[1]) > 32
10114 ? 32 : MEM_ALIGN (operands
[1]))))
10115 && ! MEM_VOLATILE_P (operands
[0])
10116 && ! MEM_VOLATILE_P (operands
[1]))
10118 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10119 adjust_address (operands
[1], SImode
, 0));
10120 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10121 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
10125 if (can_create_pseudo_p () && MEM_P (operands
[0])
10126 && !gpc_reg_operand (operands
[1], mode
))
10127 operands
[1] = force_reg (mode
, operands
[1]);
10129 /* Recognize the case where operand[1] is a reference to thread-local
10130 data and load its address to a register. */
10131 if (tls_referenced_p (operands
[1]))
10133 enum tls_model model
;
10134 rtx tmp
= operands
[1];
10137 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
10139 addend
= XEXP (XEXP (tmp
, 0), 1);
10140 tmp
= XEXP (XEXP (tmp
, 0), 0);
10143 gcc_assert (SYMBOL_REF_P (tmp
));
10144 model
= SYMBOL_REF_TLS_MODEL (tmp
);
10145 gcc_assert (model
!= 0);
10147 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
10150 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
10151 tmp
= force_operand (tmp
, operands
[0]);
10156 /* 128-bit constant floating-point values on Darwin should really be loaded
10157 as two parts. However, this premature splitting is a problem when DFmode
10158 values can go into Altivec registers. */
10159 if (TARGET_MACHO
&& CONST_DOUBLE_P (operands
[1]) && FLOAT128_IBM_P (mode
)
10160 && !reg_addr
[DFmode
].scalar_in_vmx_p
)
10162 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
10163 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
10165 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
10166 GET_MODE_SIZE (DFmode
)),
10167 simplify_gen_subreg (DFmode
, operands
[1], mode
,
10168 GET_MODE_SIZE (DFmode
)),
10173 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10174 p1:SD) if p1 is not of floating point class and p0 is spilled as
10175 we can have no analogous movsd_store for this. */
10176 if (lra_in_progress
&& mode
== DDmode
10177 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10178 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10179 && SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1]))
10180 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
10183 int regno
= REGNO (SUBREG_REG (operands
[1]));
10185 if (!HARD_REGISTER_NUM_P (regno
))
10187 cl
= reg_preferred_class (regno
);
10188 regno
= reg_renumber
[regno
];
10190 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
10192 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10195 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
10196 operands
[1] = SUBREG_REG (operands
[1]);
10199 if (lra_in_progress
10201 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10202 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10203 && (REG_P (operands
[1])
10204 || (SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1])))))
10206 int regno
= reg_or_subregno (operands
[1]);
10209 if (!HARD_REGISTER_NUM_P (regno
))
10211 cl
= reg_preferred_class (regno
);
10212 gcc_assert (cl
!= NO_REGS
);
10213 regno
= reg_renumber
[regno
];
10215 regno
= ira_class_hard_regs
[cl
][0];
10217 if (FP_REGNO_P (regno
))
10219 if (GET_MODE (operands
[0]) != DDmode
)
10220 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
10221 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
10223 else if (INT_REGNO_P (regno
))
10224 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10229 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10230 p:DD)) if p0 is not of floating point class and p1 is spilled as
10231 we can have no analogous movsd_load for this. */
10232 if (lra_in_progress
&& mode
== DDmode
10233 && SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))
10234 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
10235 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10236 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10239 int regno
= REGNO (SUBREG_REG (operands
[0]));
10241 if (!HARD_REGISTER_NUM_P (regno
))
10243 cl
= reg_preferred_class (regno
);
10244 regno
= reg_renumber
[regno
];
10246 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
10248 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10251 operands
[0] = SUBREG_REG (operands
[0]);
10252 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
10255 if (lra_in_progress
10257 && (REG_P (operands
[0])
10258 || (SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))))
10259 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10260 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10262 int regno
= reg_or_subregno (operands
[0]);
10265 if (!HARD_REGISTER_NUM_P (regno
))
10267 cl
= reg_preferred_class (regno
);
10268 gcc_assert (cl
!= NO_REGS
);
10269 regno
= reg_renumber
[regno
];
10271 regno
= ira_class_hard_regs
[cl
][0];
10273 if (FP_REGNO_P (regno
))
10275 if (GET_MODE (operands
[1]) != DDmode
)
10276 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
10277 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
10279 else if (INT_REGNO_P (regno
))
10280 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10286 /* FIXME: In the long term, this switch statement should go away
10287 and be replaced by a sequence of tests based on things like
10293 if (CONSTANT_P (operands
[1])
10294 && !CONST_INT_P (operands
[1]))
10295 operands
[1] = force_const_mem (mode
, operands
[1]);
10302 if (FLOAT128_2REG_P (mode
))
10303 rs6000_eliminate_indexed_memrefs (operands
);
10310 if (CONSTANT_P (operands
[1])
10311 && ! easy_fp_constant (operands
[1], mode
))
10312 operands
[1] = force_const_mem (mode
, operands
[1]);
10322 if (CONSTANT_P (operands
[1])
10323 && !easy_vector_constant (operands
[1], mode
))
10324 operands
[1] = force_const_mem (mode
, operands
[1]);
10329 if (CONST_INT_P (operands
[1]) && INTVAL (operands
[1]) != 0)
10330 error ("%qs is an opaque type, and you can't set it to other values.",
10331 (mode
== OOmode
) ? "__vector_pair" : "__vector_quad");
10336 /* Use default pattern for address of ELF small data */
10339 && DEFAULT_ABI
== ABI_V4
10340 && (SYMBOL_REF_P (operands
[1])
10341 || GET_CODE (operands
[1]) == CONST
)
10342 && small_data_operand (operands
[1], mode
))
10344 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10348 /* Use the default pattern for loading up PC-relative addresses. */
10349 if (TARGET_PCREL
&& mode
== Pmode
10350 && pcrel_local_or_external_address (operands
[1], Pmode
))
10352 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10356 if (DEFAULT_ABI
== ABI_V4
10357 && mode
== Pmode
&& mode
== SImode
10358 && flag_pic
== 1 && got_operand (operands
[1], mode
))
10360 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
10364 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
10365 && TARGET_NO_TOC_OR_PCREL
10368 && CONSTANT_P (operands
[1])
10369 && GET_CODE (operands
[1]) != HIGH
10370 && !CONST_INT_P (operands
[1]))
10372 rtx target
= (!can_create_pseudo_p ()
10374 : gen_reg_rtx (mode
));
10376 /* If this is a function address on -mcall-aixdesc,
10377 convert it to the address of the descriptor. */
10378 if (DEFAULT_ABI
== ABI_AIX
10379 && SYMBOL_REF_P (operands
[1])
10380 && XSTR (operands
[1], 0)[0] == '.')
10382 const char *name
= XSTR (operands
[1], 0);
10384 while (*name
== '.')
10386 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
10387 CONSTANT_POOL_ADDRESS_P (new_ref
)
10388 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
10389 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
10390 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
10391 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
10392 operands
[1] = new_ref
;
10395 if (DEFAULT_ABI
== ABI_DARWIN
)
10398 /* This is not PIC code, but could require the subset of
10399 indirections used by mdynamic-no-pic. */
10400 if (MACHO_DYNAMIC_NO_PIC_P
)
10402 /* Take care of any required data indirection. */
10403 operands
[1] = rs6000_machopic_legitimize_pic_address (
10404 operands
[1], mode
, operands
[0]);
10405 if (operands
[0] != operands
[1])
10406 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10410 emit_insn (gen_macho_high (Pmode
, target
, operands
[1]));
10411 emit_insn (gen_macho_low (Pmode
, operands
[0],
10412 target
, operands
[1]));
10416 emit_insn (gen_elf_high (target
, operands
[1]));
10417 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
10421 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10422 and we have put it in the TOC, we just need to make a TOC-relative
10423 reference to it. */
10425 && SYMBOL_REF_P (operands
[1])
10426 && use_toc_relative_ref (operands
[1], mode
))
10427 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
10428 else if (mode
== Pmode
10429 && CONSTANT_P (operands
[1])
10430 && GET_CODE (operands
[1]) != HIGH
10431 && ((REG_P (operands
[0])
10432 && FP_REGNO_P (REGNO (operands
[0])))
10433 || !CONST_INT_P (operands
[1])
10434 || (num_insns_constant (operands
[1], mode
)
10435 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
10436 && !toc_relative_expr_p (operands
[1], false, NULL
, NULL
)
10437 && (TARGET_CMODEL
== CMODEL_SMALL
10438 || can_create_pseudo_p ()
10439 || (REG_P (operands
[0])
10440 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
10444 /* Darwin uses a special PIC legitimizer. */
10445 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
10448 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
10450 if (operands
[0] != operands
[1])
10451 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10456 /* If we are to limit the number of things we put in the TOC and
10457 this is a symbol plus a constant we can add in one insn,
10458 just put the symbol in the TOC and add the constant. */
10459 if (GET_CODE (operands
[1]) == CONST
10460 && TARGET_NO_SUM_IN_TOC
10461 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
10462 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
10463 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
10464 || SYMBOL_REF_P (XEXP (XEXP (operands
[1], 0), 0)))
10465 && ! side_effects_p (operands
[0]))
10468 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
10469 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
10471 sym
= force_reg (mode
, sym
);
10472 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
10476 operands
[1] = force_const_mem (mode
, operands
[1]);
10479 && SYMBOL_REF_P (XEXP (operands
[1], 0))
10480 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
10482 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
10484 operands
[1] = gen_const_mem (mode
, tocref
);
10485 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
10491 if (!VECTOR_MEM_VSX_P (TImode
))
10492 rs6000_eliminate_indexed_memrefs (operands
);
10496 rs6000_eliminate_indexed_memrefs (operands
);
10500 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
10503 /* Above, we may have called force_const_mem which may have returned
10504 an invalid address. If we can, fix this up; otherwise, reload will
10505 have to deal with it. */
10506 if (MEM_P (operands
[1]))
10507 operands
[1] = validize_mem (operands
[1]);
10509 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10513 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10515 init_float128_ibm (machine_mode mode
)
10517 if (!TARGET_XL_COMPAT
)
10519 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
10520 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
10521 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
10522 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
10524 if (!TARGET_HARD_FLOAT
)
10526 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
10527 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
10528 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
10529 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
10530 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
10531 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
10532 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
10533 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
10535 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
10536 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
10537 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
10538 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
10539 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
10540 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
10541 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
10542 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
10547 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
10548 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
10549 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
10550 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
10553 /* Add various conversions for IFmode to use the traditional TFmode
10555 if (mode
== IFmode
)
10557 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf");
10558 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf");
10559 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdtf");
10560 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd");
10561 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd");
10562 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtftd");
10564 if (TARGET_POWERPC64
)
10566 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
10567 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
10568 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
10569 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
10574 /* Create a decl for either complex long double multiply or complex long double
10575 divide when long double is IEEE 128-bit floating point. We can't use
10576 __multc3 and __divtc3 because the original long double using IBM extended
10577 double used those names. The complex multiply/divide functions are encoded
10578 as builtin functions with a complex result and 4 scalar inputs. */
10581 create_complex_muldiv (const char *name
, built_in_function fncode
, tree fntype
)
10583 tree fndecl
= add_builtin_function (name
, fntype
, fncode
, BUILT_IN_NORMAL
,
10586 set_builtin_decl (fncode
, fndecl
, true);
10588 if (TARGET_DEBUG_BUILTIN
)
10589 fprintf (stderr
, "create complex %s, fncode: %d\n", name
, (int) fncode
);
10594 /* Set up IEEE 128-bit floating point routines. Use different names if the
10595 arguments can be passed in a vector register. The historical PowerPC
10596 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10597 continue to use that if we aren't using vector registers to pass IEEE
10598 128-bit floating point. */
10601 init_float128_ieee (machine_mode mode
)
10603 if (FLOAT128_VECTOR_P (mode
))
10605 static bool complex_muldiv_init_p
= false;
10607 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10608 we have clone or target attributes, this will be called a second
10609 time. We want to create the built-in function only once. */
10610 if (mode
== TFmode
&& TARGET_IEEEQUAD
&& !complex_muldiv_init_p
)
10612 complex_muldiv_init_p
= true;
10613 built_in_function fncode_mul
=
10614 (built_in_function
) (BUILT_IN_COMPLEX_MUL_MIN
+ TCmode
10615 - MIN_MODE_COMPLEX_FLOAT
);
10616 built_in_function fncode_div
=
10617 (built_in_function
) (BUILT_IN_COMPLEX_DIV_MIN
+ TCmode
10618 - MIN_MODE_COMPLEX_FLOAT
);
10620 tree fntype
= build_function_type_list (complex_long_double_type_node
,
10621 long_double_type_node
,
10622 long_double_type_node
,
10623 long_double_type_node
,
10624 long_double_type_node
,
10627 create_complex_muldiv ("__mulkc3", fncode_mul
, fntype
);
10628 create_complex_muldiv ("__divkc3", fncode_div
, fntype
);
10631 set_optab_libfunc (add_optab
, mode
, "__addkf3");
10632 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
10633 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
10634 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
10635 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
10636 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
10637 set_optab_libfunc (abs_optab
, mode
, "__abskf2");
10638 set_optab_libfunc (powi_optab
, mode
, "__powikf2");
10640 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
10641 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
10642 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
10643 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
10644 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
10645 set_optab_libfunc (le_optab
, mode
, "__lekf2");
10646 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
10648 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
10649 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
10650 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
10651 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
10653 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__trunctfkf2");
10654 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
10655 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__trunctfkf2");
10657 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__extendkftf2");
10658 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
10659 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__extendkftf2");
10661 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf");
10662 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf");
10663 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdkf");
10664 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd");
10665 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd");
10666 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendkftd");
10668 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
10669 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
10670 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
10671 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
10673 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
10674 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
10675 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
10676 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
10678 if (TARGET_POWERPC64
)
10680 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti");
10681 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti");
10682 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf");
10683 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf");
10689 set_optab_libfunc (add_optab
, mode
, "_q_add");
10690 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
10691 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
10692 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
10693 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
10694 if (TARGET_PPC_GPOPT
)
10695 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
10697 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
10698 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
10699 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
10700 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
10701 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
10702 set_optab_libfunc (le_optab
, mode
, "_q_fle");
10704 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
10705 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
10706 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
10707 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
10708 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
10709 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
10710 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
10711 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
10716 rs6000_init_libfuncs (void)
10718 /* __float128 support. */
10719 if (TARGET_FLOAT128_TYPE
)
10721 init_float128_ibm (IFmode
);
10722 init_float128_ieee (KFmode
);
10725 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10726 if (TARGET_LONG_DOUBLE_128
)
10728 if (!TARGET_IEEEQUAD
)
10729 init_float128_ibm (TFmode
);
10731 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10733 init_float128_ieee (TFmode
);
10737 /* Emit a potentially record-form instruction, setting DST from SRC.
10738 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10739 signed comparison of DST with zero. If DOT is 1, the generated RTL
10740 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10741 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10742 a separate COMPARE. */
10745 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
10749 emit_move_insn (dst
, src
);
10753 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
10755 emit_move_insn (dst
, src
);
10756 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
10760 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
10763 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
10764 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
10768 rtx set
= gen_rtx_SET (dst
, src
);
10769 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
10774 /* A validation routine: say whether CODE, a condition code, and MODE
10775 match. The other alternatives either don't make sense or should
10776 never be generated. */
10779 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
10781 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
10782 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
10783 && GET_MODE_CLASS (mode
) == MODE_CC
);
10785 /* These don't make sense. */
10786 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
10787 || mode
!= CCUNSmode
);
10789 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
10790 || mode
== CCUNSmode
);
10792 gcc_assert (mode
== CCFPmode
10793 || (code
!= ORDERED
&& code
!= UNORDERED
10794 && code
!= UNEQ
&& code
!= LTGT
10795 && code
!= UNGT
&& code
!= UNLT
10796 && code
!= UNGE
&& code
!= UNLE
));
10798 /* These are invalid; the information is not there. */
10799 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
10803 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10804 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10805 not zero, store there the bit offset (counted from the right) where
10806 the single stretch of 1 bits begins; and similarly for B, the bit
10807 offset where it ends. */
10810 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
10812 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
10813 unsigned HOST_WIDE_INT bit
;
10815 int n
= GET_MODE_PRECISION (mode
);
10817 if (mode
!= DImode
&& mode
!= SImode
)
10820 if (INTVAL (mask
) >= 0)
10823 ne
= exact_log2 (bit
);
10824 nb
= exact_log2 (val
+ bit
);
10826 else if (val
+ 1 == 0)
10835 nb
= exact_log2 (bit
);
10836 ne
= exact_log2 (val
+ bit
);
10841 ne
= exact_log2 (bit
);
10842 if (val
+ bit
== 0)
10850 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
10861 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10862 or rldicr instruction, to implement an AND with it in mode MODE. */
10865 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
10869 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
10872 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10874 if (mode
== DImode
)
10875 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
10877 /* For SImode, rlwinm can do everything. */
10878 if (mode
== SImode
)
10879 return (nb
< 32 && ne
< 32);
10884 /* Return the instruction template for an AND with mask in mode MODE, with
10885 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10888 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
10892 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
10893 gcc_unreachable ();
10895 if (mode
== DImode
&& ne
== 0)
10897 operands
[3] = GEN_INT (63 - nb
);
10899 return "rldicl. %0,%1,0,%3";
10900 return "rldicl %0,%1,0,%3";
10903 if (mode
== DImode
&& nb
== 63)
10905 operands
[3] = GEN_INT (63 - ne
);
10907 return "rldicr. %0,%1,0,%3";
10908 return "rldicr %0,%1,0,%3";
10911 if (nb
< 32 && ne
< 32)
10913 operands
[3] = GEN_INT (31 - nb
);
10914 operands
[4] = GEN_INT (31 - ne
);
10916 return "rlwinm. %0,%1,0,%3,%4";
10917 return "rlwinm %0,%1,0,%3,%4";
10920 gcc_unreachable ();
10923 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10924 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10925 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10928 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
10932 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
10935 int n
= GET_MODE_PRECISION (mode
);
10938 if (CONST_INT_P (XEXP (shift
, 1)))
10940 sh
= INTVAL (XEXP (shift
, 1));
10941 if (sh
< 0 || sh
>= n
)
10945 rtx_code code
= GET_CODE (shift
);
10947 /* Convert any shift by 0 to a rotate, to simplify below code. */
10951 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10952 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
10954 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
10960 /* DImode rotates need rld*. */
10961 if (mode
== DImode
&& code
== ROTATE
)
10962 return (nb
== 63 || ne
== 0 || ne
== sh
);
10964 /* SImode rotates need rlw*. */
10965 if (mode
== SImode
&& code
== ROTATE
)
10966 return (nb
< 32 && ne
< 32 && sh
< 32);
10968 /* Wrap-around masks are only okay for rotates. */
10972 /* Variable shifts are only okay for rotates. */
10976 /* Don't allow ASHIFT if the mask is wrong for that. */
10977 if (code
== ASHIFT
&& ne
< sh
)
10980 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10981 if the mask is wrong for that. */
10982 if (nb
< 32 && ne
< 32 && sh
< 32
10983 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
10986 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10987 if the mask is wrong for that. */
10988 if (code
== LSHIFTRT
)
10990 if (nb
== 63 || ne
== 0 || ne
== sh
)
10991 return !(code
== LSHIFTRT
&& nb
>= sh
);
10996 /* Return the instruction template for a shift with mask in mode MODE, with
10997 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11000 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11004 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11005 gcc_unreachable ();
11007 if (mode
== DImode
&& ne
== 0)
11009 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11010 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
11011 operands
[3] = GEN_INT (63 - nb
);
11013 return "rld%I2cl. %0,%1,%2,%3";
11014 return "rld%I2cl %0,%1,%2,%3";
11017 if (mode
== DImode
&& nb
== 63)
11019 operands
[3] = GEN_INT (63 - ne
);
11021 return "rld%I2cr. %0,%1,%2,%3";
11022 return "rld%I2cr %0,%1,%2,%3";
11026 && GET_CODE (operands
[4]) != LSHIFTRT
11027 && CONST_INT_P (operands
[2])
11028 && ne
== INTVAL (operands
[2]))
11030 operands
[3] = GEN_INT (63 - nb
);
11032 return "rld%I2c. %0,%1,%2,%3";
11033 return "rld%I2c %0,%1,%2,%3";
11036 if (nb
< 32 && ne
< 32)
11038 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11039 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11040 operands
[3] = GEN_INT (31 - nb
);
11041 operands
[4] = GEN_INT (31 - ne
);
11042 /* This insn can also be a 64-bit rotate with mask that really makes
11043 it just a shift right (with mask); the %h below are to adjust for
11044 that situation (shift count is >= 32 in that case). */
11046 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11047 return "rlw%I2nm %0,%1,%h2,%3,%4";
11050 gcc_unreachable ();
11053 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11054 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11055 ASHIFT, or LSHIFTRT) in mode MODE. */
11058 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
11062 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11065 int n
= GET_MODE_PRECISION (mode
);
11067 int sh
= INTVAL (XEXP (shift
, 1));
11068 if (sh
< 0 || sh
>= n
)
11071 rtx_code code
= GET_CODE (shift
);
11073 /* Convert any shift by 0 to a rotate, to simplify below code. */
11077 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11078 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11080 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11086 /* DImode rotates need rldimi. */
11087 if (mode
== DImode
&& code
== ROTATE
)
11090 /* SImode rotates need rlwimi. */
11091 if (mode
== SImode
&& code
== ROTATE
)
11092 return (nb
< 32 && ne
< 32 && sh
< 32);
11094 /* Wrap-around masks are only okay for rotates. */
11098 /* Don't allow ASHIFT if the mask is wrong for that. */
11099 if (code
== ASHIFT
&& ne
< sh
)
11102 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11103 if the mask is wrong for that. */
11104 if (nb
< 32 && ne
< 32 && sh
< 32
11105 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11108 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11109 if the mask is wrong for that. */
11110 if (code
== LSHIFTRT
)
11113 return !(code
== LSHIFTRT
&& nb
>= sh
);
11118 /* Return the instruction template for an insert with mask in mode MODE, with
11119 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11122 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11126 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11127 gcc_unreachable ();
11129 /* Prefer rldimi because rlwimi is cracked. */
11130 if (TARGET_POWERPC64
11131 && (!dot
|| mode
== DImode
)
11132 && GET_CODE (operands
[4]) != LSHIFTRT
11133 && ne
== INTVAL (operands
[2]))
11135 operands
[3] = GEN_INT (63 - nb
);
11137 return "rldimi. %0,%1,%2,%3";
11138 return "rldimi %0,%1,%2,%3";
11141 if (nb
< 32 && ne
< 32)
11143 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11144 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11145 operands
[3] = GEN_INT (31 - nb
);
11146 operands
[4] = GEN_INT (31 - ne
);
11148 return "rlwimi. %0,%1,%2,%3,%4";
11149 return "rlwimi %0,%1,%2,%3,%4";
11152 gcc_unreachable ();
11155 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11156 using two machine instructions. */
11159 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
11161 /* There are two kinds of AND we can handle with two insns:
11162 1) those we can do with two rl* insn;
11165 We do not handle that last case yet. */
11167 /* If there is just one stretch of ones, we can do it. */
11168 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
11171 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11172 one insn, we can do the whole thing with two. */
11173 unsigned HOST_WIDE_INT val
= INTVAL (c
);
11174 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11175 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11176 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11177 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11178 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
11181 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11182 If EXPAND is true, split rotate-and-mask instructions we generate to
11183 their constituent parts as well (this is used during expand); if DOT
11184 is 1, make the last insn a record-form instruction clobbering the
11185 destination GPR and setting the CC reg (from operands[3]); if 2, set
11186 that GPR as well as the CC reg. */
11189 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
11191 gcc_assert (!(expand
&& dot
));
11193 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
11195 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11196 shift right. This generates better code than doing the masks without
11197 shifts, or shifting first right and then left. */
11199 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
11201 gcc_assert (mode
== DImode
);
11203 int shift
= 63 - nb
;
11206 rtx tmp1
= gen_reg_rtx (DImode
);
11207 rtx tmp2
= gen_reg_rtx (DImode
);
11208 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
11209 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
11210 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
11214 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
11215 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
11216 emit_move_insn (operands
[0], tmp
);
11217 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
11218 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11223 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11224 that does the rest. */
11225 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11226 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11227 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11228 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11230 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
11231 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
11233 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
11235 /* Two "no-rotate"-and-mask instructions, for SImode. */
11236 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
11238 gcc_assert (mode
== SImode
);
11240 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11241 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
11242 emit_move_insn (reg
, tmp
);
11243 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11244 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11248 gcc_assert (mode
== DImode
);
11250 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11251 insns; we have to do the first in SImode, because it wraps. */
11252 if (mask2
<= 0xffffffff
11253 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
11255 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11256 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
11258 rtx reg_low
= gen_lowpart (SImode
, reg
);
11259 emit_move_insn (reg_low
, tmp
);
11260 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11261 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11265 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11266 at the top end), rotate back and clear the other hole. */
11267 int right
= exact_log2 (bit3
);
11268 int left
= 64 - right
;
11270 /* Rotate the mask too. */
11271 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
11275 rtx tmp1
= gen_reg_rtx (DImode
);
11276 rtx tmp2
= gen_reg_rtx (DImode
);
11277 rtx tmp3
= gen_reg_rtx (DImode
);
11278 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
11279 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
11280 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
11281 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
11285 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
11286 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
11287 emit_move_insn (operands
[0], tmp
);
11288 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
11289 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
11290 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11294 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11295 for lfq and stfq insns iff the registers are hard registers. */
11298 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
11300 /* We might have been passed a SUBREG. */
11301 if (!REG_P (reg1
) || !REG_P (reg2
))
11304 /* We might have been passed non floating point registers. */
11305 if (!FP_REGNO_P (REGNO (reg1
))
11306 || !FP_REGNO_P (REGNO (reg2
)))
11309 return (REGNO (reg1
) == REGNO (reg2
) - 1);
11312 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11313 addr1 and addr2 must be in consecutive memory locations
11314 (addr2 == addr1 + 8). */
11317 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
11320 unsigned int reg1
, reg2
;
11321 int offset1
, offset2
;
11323 /* The mems cannot be volatile. */
11324 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
11327 addr1
= XEXP (mem1
, 0);
11328 addr2
= XEXP (mem2
, 0);
11330 /* Extract an offset (if used) from the first addr. */
11331 if (GET_CODE (addr1
) == PLUS
)
11333 /* If not a REG, return zero. */
11334 if (!REG_P (XEXP (addr1
, 0)))
11338 reg1
= REGNO (XEXP (addr1
, 0));
11339 /* The offset must be constant! */
11340 if (!CONST_INT_P (XEXP (addr1
, 1)))
11342 offset1
= INTVAL (XEXP (addr1
, 1));
11345 else if (!REG_P (addr1
))
11349 reg1
= REGNO (addr1
);
11350 /* This was a simple (mem (reg)) expression. Offset is 0. */
11354 /* And now for the second addr. */
11355 if (GET_CODE (addr2
) == PLUS
)
11357 /* If not a REG, return zero. */
11358 if (!REG_P (XEXP (addr2
, 0)))
11362 reg2
= REGNO (XEXP (addr2
, 0));
11363 /* The offset must be constant. */
11364 if (!CONST_INT_P (XEXP (addr2
, 1)))
11366 offset2
= INTVAL (XEXP (addr2
, 1));
11369 else if (!REG_P (addr2
))
11373 reg2
= REGNO (addr2
);
11374 /* This was a simple (mem (reg)) expression. Offset is 0. */
11378 /* Both of these must have the same base register. */
11382 /* The offset for the second addr must be 8 more than the first addr. */
11383 if (offset2
!= offset1
+ 8)
11386 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11391 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11392 need to use DDmode, in all other cases we can use the same mode. */
11393 static machine_mode
11394 rs6000_secondary_memory_needed_mode (machine_mode mode
)
11396 if (lra_in_progress
&& mode
== SDmode
)
11401 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11402 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11403 only work on the traditional altivec registers, note if an altivec register
11406 static enum rs6000_reg_type
11407 register_to_reg_type (rtx reg
, bool *is_altivec
)
11409 HOST_WIDE_INT regno
;
11410 enum reg_class rclass
;
11412 if (SUBREG_P (reg
))
11413 reg
= SUBREG_REG (reg
);
11416 return NO_REG_TYPE
;
11418 regno
= REGNO (reg
);
11419 if (!HARD_REGISTER_NUM_P (regno
))
11421 if (!lra_in_progress
&& !reload_completed
)
11422 return PSEUDO_REG_TYPE
;
11424 regno
= true_regnum (reg
);
11425 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
))
11426 return PSEUDO_REG_TYPE
;
11429 gcc_assert (regno
>= 0);
11431 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
11432 *is_altivec
= true;
11434 rclass
= rs6000_regno_regclass
[regno
];
11435 return reg_class_to_reg_type
[(int)rclass
];
11438 /* Helper function to return the cost of adding a TOC entry address. */
11441 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
11445 if (TARGET_CMODEL
!= CMODEL_SMALL
)
11446 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
11449 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
11454 /* Helper function for rs6000_secondary_reload to determine whether the memory
11455 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11456 needs reloading. Return negative if the memory is not handled by the memory
11457 helper functions and to try a different reload method, 0 if no additional
11458 instructions are need, and positive to give the extra cost for the
11462 rs6000_secondary_reload_memory (rtx addr
,
11463 enum reg_class rclass
,
11466 int extra_cost
= 0;
11467 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
11468 addr_mask_type addr_mask
;
11469 const char *type
= NULL
;
11470 const char *fail_msg
= NULL
;
11472 if (GPR_REG_CLASS_P (rclass
))
11473 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
11475 else if (rclass
== FLOAT_REGS
)
11476 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
11478 else if (rclass
== ALTIVEC_REGS
)
11479 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
11481 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11482 else if (rclass
== VSX_REGS
)
11483 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
11484 & ~RELOAD_REG_AND_M16
);
11486 /* If the register allocator hasn't made up its mind yet on the register
11487 class to use, settle on defaults to use. */
11488 else if (rclass
== NO_REGS
)
11490 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
11491 & ~RELOAD_REG_AND_M16
);
11493 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
11494 addr_mask
&= ~(RELOAD_REG_INDEXED
11495 | RELOAD_REG_PRE_INCDEC
11496 | RELOAD_REG_PRE_MODIFY
);
11502 /* If the register isn't valid in this register class, just return now. */
11503 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
11505 if (TARGET_DEBUG_ADDR
)
11508 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11509 "not valid in class\n",
11510 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
11517 switch (GET_CODE (addr
))
11519 /* Does the register class supports auto update forms for this mode? We
11520 don't need a scratch register, since the powerpc only supports
11521 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11524 reg
= XEXP (addr
, 0);
11525 if (!base_reg_operand (addr
, GET_MODE (reg
)))
11527 fail_msg
= "no base register #1";
11531 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
11539 reg
= XEXP (addr
, 0);
11540 plus_arg1
= XEXP (addr
, 1);
11541 if (!base_reg_operand (reg
, GET_MODE (reg
))
11542 || GET_CODE (plus_arg1
) != PLUS
11543 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
11545 fail_msg
= "bad PRE_MODIFY";
11549 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
11556 /* Do we need to simulate AND -16 to clear the bottom address bits used
11557 in VMX load/stores? Only allow the AND for vector sizes. */
11559 and_arg
= XEXP (addr
, 0);
11560 if (GET_MODE_SIZE (mode
) != 16
11561 || !CONST_INT_P (XEXP (addr
, 1))
11562 || INTVAL (XEXP (addr
, 1)) != -16)
11564 fail_msg
= "bad Altivec AND #1";
11568 if (rclass
!= ALTIVEC_REGS
)
11570 if (legitimate_indirect_address_p (and_arg
, false))
11573 else if (legitimate_indexed_address_p (and_arg
, false))
11578 fail_msg
= "bad Altivec AND #2";
11586 /* If this is an indirect address, make sure it is a base register. */
11589 if (!legitimate_indirect_address_p (addr
, false))
11596 /* If this is an indexed address, make sure the register class can handle
11597 indexed addresses for this mode. */
11599 plus_arg0
= XEXP (addr
, 0);
11600 plus_arg1
= XEXP (addr
, 1);
11602 /* (plus (plus (reg) (constant)) (constant)) is generated during
11603 push_reload processing, so handle it now. */
11604 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
11606 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11613 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11614 push_reload processing, so handle it now. */
11615 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
11617 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
11620 type
= "indexed #2";
11624 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
11626 fail_msg
= "no base register #2";
11630 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
11632 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
11633 || !legitimate_indexed_address_p (addr
, false))
11640 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
11641 && CONST_INT_P (plus_arg1
))
11643 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
11646 type
= "vector d-form offset";
11650 /* Make sure the register class can handle offset addresses. */
11651 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
11653 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11656 type
= "offset #2";
11662 fail_msg
= "bad PLUS";
11669 /* Quad offsets are restricted and can't handle normal addresses. */
11670 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
11673 type
= "vector d-form lo_sum";
11676 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
11678 fail_msg
= "bad LO_SUM";
11682 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11689 /* Static addresses need to create a TOC entry. */
11693 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
11696 type
= "vector d-form lo_sum #2";
11702 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
11706 /* TOC references look like offsetable memory. */
11708 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
11710 fail_msg
= "bad UNSPEC";
11714 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
11717 type
= "vector d-form lo_sum #3";
11720 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11723 type
= "toc reference";
11729 fail_msg
= "bad address";
11734 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
11736 if (extra_cost
< 0)
11738 "rs6000_secondary_reload_memory error: mode = %s, "
11739 "class = %s, addr_mask = '%s', %s\n",
11740 GET_MODE_NAME (mode
),
11741 reg_class_names
[rclass
],
11742 rs6000_debug_addr_mask (addr_mask
, false),
11743 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
11747 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11748 "addr_mask = '%s', extra cost = %d, %s\n",
11749 GET_MODE_NAME (mode
),
11750 reg_class_names
[rclass
],
11751 rs6000_debug_addr_mask (addr_mask
, false),
11753 (type
) ? type
: "<none>");
11761 /* Helper function for rs6000_secondary_reload to return true if a move to a
11762 different register classe is really a simple move. */
11765 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
11766 enum rs6000_reg_type from_type
,
11769 int size
= GET_MODE_SIZE (mode
);
11771 /* Add support for various direct moves available. In this function, we only
11772 look at cases where we don't need any extra registers, and one or more
11773 simple move insns are issued. Originally small integers are not allowed
11774 in FPR/VSX registers. Single precision binary floating is not a simple
11775 move because we need to convert to the single precision memory layout.
11776 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11777 need special direct move handling, which we do not support yet. */
11778 if (TARGET_DIRECT_MOVE
11779 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
11780 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
11782 if (TARGET_POWERPC64
)
11784 /* ISA 2.07: MTVSRD or MVFVSRD. */
11788 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11789 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
11793 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11794 if (TARGET_P8_VECTOR
)
11796 if (mode
== SImode
)
11799 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
11803 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11804 if (mode
== SDmode
)
11808 /* Move to/from SPR. */
11809 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
11810 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
11811 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
11817 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11818 special direct moves that involve allocating an extra register, return the
11819 insn code of the helper function if there is such a function or
11820 CODE_FOR_nothing if not. */
11823 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
11824 enum rs6000_reg_type from_type
,
11826 secondary_reload_info
*sri
,
11830 enum insn_code icode
= CODE_FOR_nothing
;
11832 int size
= GET_MODE_SIZE (mode
);
11834 if (TARGET_POWERPC64
&& size
== 16)
11836 /* Handle moving 128-bit values from GPRs to VSX point registers on
11837 ISA 2.07 (power8, power9) when running in 64-bit mode using
11838 XXPERMDI to glue the two 64-bit values back together. */
11839 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
11841 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
11842 icode
= reg_addr
[mode
].reload_vsx_gpr
;
11845 /* Handle moving 128-bit values from VSX point registers to GPRs on
11846 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11847 bottom 64-bit value. */
11848 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
11850 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
11851 icode
= reg_addr
[mode
].reload_gpr_vsx
;
11855 else if (TARGET_POWERPC64
&& mode
== SFmode
)
11857 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
11859 cost
= 3; /* xscvdpspn, mfvsrd, and. */
11860 icode
= reg_addr
[mode
].reload_gpr_vsx
;
11863 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
11865 cost
= 2; /* mtvsrz, xscvspdpn. */
11866 icode
= reg_addr
[mode
].reload_vsx_gpr
;
11870 else if (!TARGET_POWERPC64
&& size
== 8)
11872 /* Handle moving 64-bit values from GPRs to floating point registers on
11873 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11874 32-bit values back together. Altivec register classes must be handled
11875 specially since a different instruction is used, and the secondary
11876 reload support requires a single instruction class in the scratch
11877 register constraint. However, right now TFmode is not allowed in
11878 Altivec registers, so the pattern will never match. */
11879 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
11881 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
11882 icode
= reg_addr
[mode
].reload_fpr_gpr
;
11886 if (icode
!= CODE_FOR_nothing
)
11891 sri
->icode
= icode
;
11892 sri
->extra_cost
= cost
;
11899 /* Return whether a move between two register classes can be done either
11900 directly (simple move) or via a pattern that uses a single extra temporary
11901 (using ISA 2.07's direct move in this case. */
11904 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
11905 enum rs6000_reg_type from_type
,
11907 secondary_reload_info
*sri
,
11910 /* Fall back to load/store reloads if either type is not a register. */
11911 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
11914 /* If we haven't allocated registers yet, assume the move can be done for the
11915 standard register types. */
11916 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
11917 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
11918 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
11921 /* Moves to the same set of registers is a simple move for non-specialized
11923 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
11926 /* Check whether a simple move can be done directly. */
11927 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
11931 sri
->icode
= CODE_FOR_nothing
;
11932 sri
->extra_cost
= 0;
11937 /* Now check if we can do it in a few steps. */
11938 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
11942 /* Inform reload about cases where moving X with a mode MODE to a register in
11943 RCLASS requires an extra scratch or immediate register. Return the class
11944 needed for the immediate register.
11946 For VSX and Altivec, we may need a register to convert sp+offset into
11949 For misaligned 64-bit gpr loads and stores we need a register to
11950 convert an offset address to indirect. */
11953 rs6000_secondary_reload (bool in_p
,
11955 reg_class_t rclass_i
,
11957 secondary_reload_info
*sri
)
11959 enum reg_class rclass
= (enum reg_class
) rclass_i
;
11960 reg_class_t ret
= ALL_REGS
;
11961 enum insn_code icode
;
11962 bool default_p
= false;
11963 bool done_p
= false;
11965 /* Allow subreg of memory before/during reload. */
11966 bool memory_p
= (MEM_P (x
)
11967 || (!reload_completed
&& SUBREG_P (x
)
11968 && MEM_P (SUBREG_REG (x
))));
11970 sri
->icode
= CODE_FOR_nothing
;
11971 sri
->t_icode
= CODE_FOR_nothing
;
11972 sri
->extra_cost
= 0;
11974 ? reg_addr
[mode
].reload_load
11975 : reg_addr
[mode
].reload_store
);
11977 if (REG_P (x
) || register_operand (x
, mode
))
11979 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
11980 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
11981 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
11984 std::swap (to_type
, from_type
);
11986 /* Can we do a direct move of some sort? */
11987 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
11990 icode
= (enum insn_code
)sri
->icode
;
11997 /* Make sure 0.0 is not reloaded or forced into memory. */
11998 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
12005 /* If this is a scalar floating point value and we want to load it into the
12006 traditional Altivec registers, do it via a move via a traditional floating
12007 point register, unless we have D-form addressing. Also make sure that
12008 non-zero constants use a FPR. */
12009 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
12010 && !mode_supports_vmx_dform (mode
)
12011 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12012 && (memory_p
|| CONST_DOUBLE_P (x
)))
12019 /* Handle reload of load/stores if we have reload helper functions. */
12020 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
12022 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
12025 if (extra_cost
>= 0)
12029 if (extra_cost
> 0)
12031 sri
->extra_cost
= extra_cost
;
12032 sri
->icode
= icode
;
12037 /* Handle unaligned loads and stores of integer registers. */
12038 if (!done_p
&& TARGET_POWERPC64
12039 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12041 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
12043 rtx addr
= XEXP (x
, 0);
12044 rtx off
= address_offset (addr
);
12046 if (off
!= NULL_RTX
)
12048 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12049 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12051 /* We need a secondary reload when our legitimate_address_p
12052 says the address is good (as otherwise the entire address
12053 will be reloaded), and the offset is not a multiple of
12054 four or we have an address wrap. Address wrap will only
12055 occur for LO_SUMs since legitimate_offset_address_p
12056 rejects addresses for 16-byte mems that will wrap. */
12057 if (GET_CODE (addr
) == LO_SUM
12058 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12059 && ((offset
& 3) != 0
12060 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
12061 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
12062 && (offset
& 3) != 0))
12064 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12066 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
12067 : CODE_FOR_reload_di_load
);
12069 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
12070 : CODE_FOR_reload_di_store
);
12071 sri
->extra_cost
= 2;
12082 if (!done_p
&& !TARGET_POWERPC64
12083 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12085 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
12087 rtx addr
= XEXP (x
, 0);
12088 rtx off
= address_offset (addr
);
12090 if (off
!= NULL_RTX
)
12092 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12093 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12095 /* We need a secondary reload when our legitimate_address_p
12096 says the address is good (as otherwise the entire address
12097 will be reloaded), and we have a wrap.
12099 legitimate_lo_sum_address_p allows LO_SUM addresses to
12100 have any offset so test for wrap in the low 16 bits.
12102 legitimate_offset_address_p checks for the range
12103 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12104 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12105 [0x7ff4,0x7fff] respectively, so test for the
12106 intersection of these ranges, [0x7ffc,0x7fff] and
12107 [0x7ff4,0x7ff7] respectively.
12109 Note that the address we see here may have been
12110 manipulated by legitimize_reload_address. */
12111 if (GET_CODE (addr
) == LO_SUM
12112 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
12113 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
12116 sri
->icode
= CODE_FOR_reload_si_load
;
12118 sri
->icode
= CODE_FOR_reload_si_store
;
12119 sri
->extra_cost
= 2;
12134 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
12136 gcc_assert (ret
!= ALL_REGS
);
12138 if (TARGET_DEBUG_ADDR
)
12141 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12143 reg_class_names
[ret
],
12144 in_p
? "true" : "false",
12145 reg_class_names
[rclass
],
12146 GET_MODE_NAME (mode
));
12148 if (reload_completed
)
12149 fputs (", after reload", stderr
);
12152 fputs (", done_p not set", stderr
);
12155 fputs (", default secondary reload", stderr
);
12157 if (sri
->icode
!= CODE_FOR_nothing
)
12158 fprintf (stderr
, ", reload func = %s, extra cost = %d",
12159 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
12161 else if (sri
->extra_cost
> 0)
12162 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
12164 fputs ("\n", stderr
);
12171 /* Better tracing for rs6000_secondary_reload_inner. */
12174 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
12179 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
12181 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
12182 store_p
? "store" : "load");
12185 set
= gen_rtx_SET (mem
, reg
);
12187 set
= gen_rtx_SET (reg
, mem
);
12189 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
12190 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
12193 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
12194 ATTRIBUTE_NORETURN
;
12197 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
12200 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
12201 gcc_unreachable ();
12204 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12205 reload helper functions. These were identified in
12206 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12207 reload, it calls the insns:
12208 reload_<RELOAD:mode>_<P:mptrsize>_store
12209 reload_<RELOAD:mode>_<P:mptrsize>_load
12211 which in turn calls this function, to do whatever is necessary to create
12212 valid addresses. */
12215 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
12217 int regno
= true_regnum (reg
);
12218 machine_mode mode
= GET_MODE (reg
);
12219 addr_mask_type addr_mask
;
12222 rtx op_reg
, op0
, op1
;
12227 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
) || !MEM_P (mem
)
12228 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
12229 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12231 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
12232 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12234 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
12235 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12237 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
12238 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12241 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12243 /* Make sure the mode is valid in this register class. */
12244 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12245 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12247 if (TARGET_DEBUG_ADDR
)
12248 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
12250 new_addr
= addr
= XEXP (mem
, 0);
12251 switch (GET_CODE (addr
))
12253 /* Does the register class support auto update forms for this mode? If
12254 not, do the update now. We don't need a scratch register, since the
12255 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12258 op_reg
= XEXP (addr
, 0);
12259 if (!base_reg_operand (op_reg
, Pmode
))
12260 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12262 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12264 int delta
= GET_MODE_SIZE (mode
);
12265 if (GET_CODE (addr
) == PRE_DEC
)
12267 emit_insn (gen_add2_insn (op_reg
, GEN_INT (delta
)));
12273 op0
= XEXP (addr
, 0);
12274 op1
= XEXP (addr
, 1);
12275 if (!base_reg_operand (op0
, Pmode
)
12276 || GET_CODE (op1
) != PLUS
12277 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
12278 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12280 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12282 emit_insn (gen_rtx_SET (op0
, op1
));
12287 /* Do we need to simulate AND -16 to clear the bottom address bits used
12288 in VMX load/stores? */
12290 op0
= XEXP (addr
, 0);
12291 op1
= XEXP (addr
, 1);
12292 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
12294 if (REG_P (op0
) || SUBREG_P (op0
))
12297 else if (GET_CODE (op1
) == PLUS
)
12299 emit_insn (gen_rtx_SET (scratch
, op1
));
12304 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12306 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
12307 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
12308 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
12309 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
12310 new_addr
= scratch
;
12314 /* If this is an indirect address, make sure it is a base register. */
12317 if (!base_reg_operand (addr
, GET_MODE (addr
)))
12319 emit_insn (gen_rtx_SET (scratch
, addr
));
12320 new_addr
= scratch
;
12324 /* If this is an indexed address, make sure the register class can handle
12325 indexed addresses for this mode. */
12327 op0
= XEXP (addr
, 0);
12328 op1
= XEXP (addr
, 1);
12329 if (!base_reg_operand (op0
, Pmode
))
12330 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12332 else if (int_reg_operand (op1
, Pmode
))
12334 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12336 emit_insn (gen_rtx_SET (scratch
, addr
));
12337 new_addr
= scratch
;
12341 else if (mode_supports_dq_form (mode
) && CONST_INT_P (op1
))
12343 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
12344 || !quad_address_p (addr
, mode
, false))
12346 emit_insn (gen_rtx_SET (scratch
, addr
));
12347 new_addr
= scratch
;
12351 /* Make sure the register class can handle offset addresses. */
12352 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12354 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12356 emit_insn (gen_rtx_SET (scratch
, addr
));
12357 new_addr
= scratch
;
12362 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12367 op0
= XEXP (addr
, 0);
12368 op1
= XEXP (addr
, 1);
12369 if (!base_reg_operand (op0
, Pmode
))
12370 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12372 else if (int_reg_operand (op1
, Pmode
))
12374 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12376 emit_insn (gen_rtx_SET (scratch
, addr
));
12377 new_addr
= scratch
;
12381 /* Quad offsets are restricted and can't handle normal addresses. */
12382 else if (mode_supports_dq_form (mode
))
12384 emit_insn (gen_rtx_SET (scratch
, addr
));
12385 new_addr
= scratch
;
12388 /* Make sure the register class can handle offset addresses. */
12389 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
12391 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12393 emit_insn (gen_rtx_SET (scratch
, addr
));
12394 new_addr
= scratch
;
12399 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12406 rs6000_emit_move (scratch
, addr
, Pmode
);
12407 new_addr
= scratch
;
12411 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12414 /* Adjust the address if it changed. */
12415 if (addr
!= new_addr
)
12417 mem
= replace_equiv_address_nv (mem
, new_addr
);
12418 if (TARGET_DEBUG_ADDR
)
12419 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12422 /* Now create the move. */
12424 emit_insn (gen_rtx_SET (mem
, reg
));
12426 emit_insn (gen_rtx_SET (reg
, mem
));
12431 /* Convert reloads involving 64-bit gprs and misaligned offset
12432 addressing, or multiple 32-bit gprs and offsets that are too large,
12433 to use indirect addressing. */
12436 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
12438 int regno
= true_regnum (reg
);
12439 enum reg_class rclass
;
12441 rtx scratch_or_premodify
= scratch
;
12443 if (TARGET_DEBUG_ADDR
)
12445 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
12446 store_p
? "store" : "load");
12447 fprintf (stderr
, "reg:\n");
12449 fprintf (stderr
, "mem:\n");
12451 fprintf (stderr
, "scratch:\n");
12452 debug_rtx (scratch
);
12455 gcc_assert (regno
>= 0 && HARD_REGISTER_NUM_P (regno
));
12456 gcc_assert (MEM_P (mem
));
12457 rclass
= REGNO_REG_CLASS (regno
);
12458 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
12459 addr
= XEXP (mem
, 0);
12461 if (GET_CODE (addr
) == PRE_MODIFY
)
12463 gcc_assert (REG_P (XEXP (addr
, 0))
12464 && GET_CODE (XEXP (addr
, 1)) == PLUS
12465 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
12466 scratch_or_premodify
= XEXP (addr
, 0);
12467 addr
= XEXP (addr
, 1);
12469 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
12471 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
12473 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
12475 /* Now create the move. */
12477 emit_insn (gen_rtx_SET (mem
, reg
));
12479 emit_insn (gen_rtx_SET (reg
, mem
));
12484 /* Given an rtx X being reloaded into a reg required to be
12485 in class CLASS, return the class of reg to actually use.
12486 In general this is just CLASS; but on some machines
12487 in some cases it is preferable to use a more restrictive class.
12489 On the RS/6000, we have to return NO_REGS when we want to reload a
12490 floating-point CONST_DOUBLE to force it to be copied to memory.
12492 We also don't want to reload integer values into floating-point
12493 registers if we can at all help it. In fact, this can
12494 cause reload to die, if it tries to generate a reload of CTR
12495 into a FP register and discovers it doesn't have the memory location
12498 ??? Would it be a good idea to have reload do the converse, that is
12499 try to reload floating modes into FP registers if possible?
12502 static enum reg_class
12503 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
12505 machine_mode mode
= GET_MODE (x
);
12506 bool is_constant
= CONSTANT_P (x
);
12508 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12509 reload class for it. */
12510 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
12511 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
12514 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
12515 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
12518 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12519 the reloading of address expressions using PLUS into floating point
12521 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
12525 /* Zero is always allowed in all VSX registers. */
12526 if (x
== CONST0_RTX (mode
))
12529 /* If this is a vector constant that can be formed with a few Altivec
12530 instructions, we want altivec registers. */
12531 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
12532 return ALTIVEC_REGS
;
12534 /* If this is an integer constant that can easily be loaded into
12535 vector registers, allow it. */
12536 if (CONST_INT_P (x
))
12538 HOST_WIDE_INT value
= INTVAL (x
);
12540 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12541 2.06 can generate it in the Altivec registers with
12545 if (TARGET_P8_VECTOR
)
12547 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
12548 return ALTIVEC_REGS
;
12553 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12554 a sign extend in the Altivec registers. */
12555 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
12556 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
12557 return ALTIVEC_REGS
;
12560 /* Force constant to memory. */
12564 /* D-form addressing can easily reload the value. */
12565 if (mode_supports_vmx_dform (mode
)
12566 || mode_supports_dq_form (mode
))
12569 /* If this is a scalar floating point value and we don't have D-form
12570 addressing, prefer the traditional floating point registers so that we
12571 can use D-form (register+offset) addressing. */
12572 if (rclass
== VSX_REGS
12573 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
12576 /* Prefer the Altivec registers if Altivec is handling the vector
12577 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12579 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
12580 || mode
== V1TImode
)
12581 return ALTIVEC_REGS
;
12586 if (is_constant
|| GET_CODE (x
) == PLUS
)
12588 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
12589 return GENERAL_REGS
;
12590 if (reg_class_subset_p (BASE_REGS
, rclass
))
12595 /* For the vector pair and vector quad modes, prefer their natural register
12596 (VSX or FPR) rather than GPR registers. For other integer types, prefer
12597 the GPR registers. */
12598 if (rclass
== GEN_OR_FLOAT_REGS
)
12600 if (mode
== OOmode
)
12603 if (mode
== XOmode
)
12606 if (GET_MODE_CLASS (mode
) == MODE_INT
)
12607 return GENERAL_REGS
;
12613 /* Debug version of rs6000_preferred_reload_class. */
12614 static enum reg_class
12615 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
12617 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
12620 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12622 reg_class_names
[ret
], reg_class_names
[rclass
],
12623 GET_MODE_NAME (GET_MODE (x
)));
12629 /* If we are copying between FP or AltiVec registers and anything else, we need
12630 a memory location. The exception is when we are targeting ppc64 and the
12631 move to/from fpr to gpr instructions are available. Also, under VSX, you
12632 can copy vector registers from the FP register set to the Altivec register
12633 set and vice versa. */
12636 rs6000_secondary_memory_needed (machine_mode mode
,
12637 reg_class_t from_class
,
12638 reg_class_t to_class
)
12640 enum rs6000_reg_type from_type
, to_type
;
12641 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
12642 || (to_class
== ALTIVEC_REGS
));
12644 /* If a simple/direct move is available, we don't need secondary memory */
12645 from_type
= reg_class_to_reg_type
[(int)from_class
];
12646 to_type
= reg_class_to_reg_type
[(int)to_class
];
12648 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
12649 (secondary_reload_info
*)0, altivec_p
))
12652 /* If we have a floating point or vector register class, we need to use
12653 memory to transfer the data. */
12654 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
12660 /* Debug version of rs6000_secondary_memory_needed. */
12662 rs6000_debug_secondary_memory_needed (machine_mode mode
,
12663 reg_class_t from_class
,
12664 reg_class_t to_class
)
12666 bool ret
= rs6000_secondary_memory_needed (mode
, from_class
, to_class
);
12669 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12670 "to_class = %s, mode = %s\n",
12671 ret
? "true" : "false",
12672 reg_class_names
[from_class
],
12673 reg_class_names
[to_class
],
12674 GET_MODE_NAME (mode
));
12679 /* Return the register class of a scratch register needed to copy IN into
12680 or out of a register in RCLASS in MODE. If it can be done directly,
12681 NO_REGS is returned. */
12683 static enum reg_class
12684 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
12689 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
12691 && MACHOPIC_INDIRECT
12695 /* We cannot copy a symbolic operand directly into anything
12696 other than BASE_REGS for TARGET_ELF. So indicate that a
12697 register from BASE_REGS is needed as an intermediate
12700 On Darwin, pic addresses require a load from memory, which
12701 needs a base register. */
12702 if (rclass
!= BASE_REGS
12703 && (SYMBOL_REF_P (in
)
12704 || GET_CODE (in
) == HIGH
12705 || GET_CODE (in
) == LABEL_REF
12706 || GET_CODE (in
) == CONST
))
12712 regno
= REGNO (in
);
12713 if (!HARD_REGISTER_NUM_P (regno
))
12715 regno
= true_regnum (in
);
12716 if (!HARD_REGISTER_NUM_P (regno
))
12720 else if (SUBREG_P (in
))
12722 regno
= true_regnum (in
);
12723 if (!HARD_REGISTER_NUM_P (regno
))
12729 /* If we have VSX register moves, prefer moving scalar values between
12730 Altivec registers and GPR by going via an FPR (and then via memory)
12731 instead of reloading the secondary memory address for Altivec moves. */
12733 && GET_MODE_SIZE (mode
) < 16
12734 && !mode_supports_vmx_dform (mode
)
12735 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
12736 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
12737 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12738 && (regno
>= 0 && INT_REGNO_P (regno
)))))
12741 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12743 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
12744 || (regno
>= 0 && INT_REGNO_P (regno
)))
12747 /* Constants, memory, and VSX registers can go into VSX registers (both the
12748 traditional floating point and the altivec registers). */
12749 if (rclass
== VSX_REGS
12750 && (regno
== -1 || VSX_REGNO_P (regno
)))
12753 /* Constants, memory, and FP registers can go into FP registers. */
12754 if ((regno
== -1 || FP_REGNO_P (regno
))
12755 && (rclass
== FLOAT_REGS
|| rclass
== GEN_OR_FLOAT_REGS
))
12756 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
12758 /* Memory, and AltiVec registers can go into AltiVec registers. */
12759 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
12760 && rclass
== ALTIVEC_REGS
)
12763 /* We can copy among the CR registers. */
12764 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
12765 && regno
>= 0 && CR_REGNO_P (regno
))
12768 /* Otherwise, we need GENERAL_REGS. */
12769 return GENERAL_REGS
;
12772 /* Debug version of rs6000_secondary_reload_class. */
12773 static enum reg_class
12774 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
12775 machine_mode mode
, rtx in
)
12777 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
12779 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12780 "mode = %s, input rtx:\n",
12781 reg_class_names
[ret
], reg_class_names
[rclass
],
12782 GET_MODE_NAME (mode
));
12788 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12791 rs6000_can_change_mode_class (machine_mode from
,
12793 reg_class_t rclass
)
12795 unsigned from_size
= GET_MODE_SIZE (from
);
12796 unsigned to_size
= GET_MODE_SIZE (to
);
12798 if (from_size
!= to_size
)
12800 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
12802 if (reg_classes_intersect_p (xclass
, rclass
))
12804 unsigned to_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, to
);
12805 unsigned from_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, from
);
12806 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
12807 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
12809 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12810 single register under VSX because the scalar part of the register
12811 is in the upper 64-bits, and not the lower 64-bits. Types like
12812 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12813 IEEE floating point can't overlap, and neither can small
12816 if (to_float128_vector_p
&& from_float128_vector_p
)
12819 else if (to_float128_vector_p
|| from_float128_vector_p
)
12822 /* TDmode in floating-mode registers must always go into a register
12823 pair with the most significant word in the even-numbered register
12824 to match ISA requirements. In little-endian mode, this does not
12825 match subreg numbering, so we cannot allow subregs. */
12826 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
12829 /* Allow SD<->DD changes, since SDmode values are stored in
12830 the low half of the DDmode, just like target-independent
12831 code expects. We need to allow at least SD->DD since
12832 rs6000_secondary_memory_needed_mode asks for that change
12833 to be made for SD reloads. */
12834 if ((to
== DDmode
&& from
== SDmode
)
12835 || (to
== SDmode
&& from
== DDmode
))
12838 if (from_size
< 8 || to_size
< 8)
12841 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
12844 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
12853 /* Since the VSX register set includes traditional floating point registers
12854 and altivec registers, just check for the size being different instead of
12855 trying to check whether the modes are vector modes. Otherwise it won't
12856 allow say DF and DI to change classes. For types like TFmode and TDmode
12857 that take 2 64-bit registers, rather than a single 128-bit register, don't
12858 allow subregs of those types to other 128 bit types. */
12859 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
12861 unsigned num_regs
= (from_size
+ 15) / 16;
12862 if (hard_regno_nregs (FIRST_FPR_REGNO
, to
) > num_regs
12863 || hard_regno_nregs (FIRST_FPR_REGNO
, from
) > num_regs
)
12866 return (from_size
== 8 || from_size
== 16);
12869 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
12870 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
12876 /* Debug version of rs6000_can_change_mode_class. */
12878 rs6000_debug_can_change_mode_class (machine_mode from
,
12880 reg_class_t rclass
)
12882 bool ret
= rs6000_can_change_mode_class (from
, to
, rclass
);
12885 "rs6000_can_change_mode_class, return %s, from = %s, "
12886 "to = %s, rclass = %s\n",
12887 ret
? "true" : "false",
12888 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
12889 reg_class_names
[rclass
]);
12894 /* Return a string to do a move operation of 128 bits of data. */
12897 rs6000_output_move_128bit (rtx operands
[])
12899 rtx dest
= operands
[0];
12900 rtx src
= operands
[1];
12901 machine_mode mode
= GET_MODE (dest
);
12904 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
12905 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
12909 dest_regno
= REGNO (dest
);
12910 dest_gpr_p
= INT_REGNO_P (dest_regno
);
12911 dest_fp_p
= FP_REGNO_P (dest_regno
);
12912 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
12913 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
12918 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
12923 src_regno
= REGNO (src
);
12924 src_gpr_p
= INT_REGNO_P (src_regno
);
12925 src_fp_p
= FP_REGNO_P (src_regno
);
12926 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
12927 src_vsx_p
= src_fp_p
| src_vmx_p
;
12932 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
12935 /* Register moves. */
12936 if (dest_regno
>= 0 && src_regno
>= 0)
12943 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
12944 return (WORDS_BIG_ENDIAN
12945 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12946 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12948 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
12952 else if (TARGET_VSX
&& dest_vsx_p
)
12955 return "xxlor %x0,%x1,%x1";
12957 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
12958 return (WORDS_BIG_ENDIAN
12959 ? "mtvsrdd %x0,%1,%L1"
12960 : "mtvsrdd %x0,%L1,%1");
12962 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
12966 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
12967 return "vor %0,%1,%1";
12969 else if (dest_fp_p
&& src_fp_p
)
12974 else if (dest_regno
>= 0 && MEM_P (src
))
12978 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
12984 else if (TARGET_ALTIVEC
&& dest_vmx_p
12985 && altivec_indexed_or_indirect_operand (src
, mode
))
12986 return "lvx %0,%y1";
12988 else if (TARGET_VSX
&& dest_vsx_p
)
12990 if (mode_supports_dq_form (mode
)
12991 && quad_address_p (XEXP (src
, 0), mode
, true))
12992 return "lxv %x0,%1";
12994 else if (TARGET_P9_VECTOR
)
12995 return "lxvx %x0,%y1";
12997 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
12998 return "lxvw4x %x0,%y1";
13001 return "lxvd2x %x0,%y1";
13004 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
13005 return "lvx %0,%y1";
13007 else if (dest_fp_p
)
13012 else if (src_regno
>= 0 && MEM_P (dest
))
13016 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13017 return "stq %1,%0";
13022 else if (TARGET_ALTIVEC
&& src_vmx_p
13023 && altivec_indexed_or_indirect_operand (dest
, mode
))
13024 return "stvx %1,%y0";
13026 else if (TARGET_VSX
&& src_vsx_p
)
13028 if (mode_supports_dq_form (mode
)
13029 && quad_address_p (XEXP (dest
, 0), mode
, true))
13030 return "stxv %x1,%0";
13032 else if (TARGET_P9_VECTOR
)
13033 return "stxvx %x1,%y0";
13035 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13036 return "stxvw4x %x1,%y0";
13039 return "stxvd2x %x1,%y0";
13042 else if (TARGET_ALTIVEC
&& src_vmx_p
)
13043 return "stvx %1,%y0";
13050 else if (dest_regno
>= 0
13051 && (CONST_INT_P (src
)
13052 || CONST_WIDE_INT_P (src
)
13053 || CONST_DOUBLE_P (src
)
13054 || GET_CODE (src
) == CONST_VECTOR
))
13059 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
13060 || (dest_vsx_p
&& TARGET_VSX
))
13061 return output_vec_const_move (operands
);
13064 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
13067 /* Validate a 128-bit move. */
13069 rs6000_move_128bit_ok_p (rtx operands
[])
13071 machine_mode mode
= GET_MODE (operands
[0]);
13072 return (gpc_reg_operand (operands
[0], mode
)
13073 || gpc_reg_operand (operands
[1], mode
));
13076 /* Return true if a 128-bit move needs to be split. */
13078 rs6000_split_128bit_ok_p (rtx operands
[])
13080 if (!reload_completed
)
13083 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
13086 if (quad_load_store_p (operands
[0], operands
[1]))
13093 /* Given a comparison operation, return the bit number in CCR to test. We
13094 know this is a valid comparison.
13096 SCC_P is 1 if this is for an scc. That means that %D will have been
13097 used instead of %C, so the bits will be in different places.
13099 Return -1 if OP isn't a valid comparison for some reason. */
13102 ccr_bit (rtx op
, int scc_p
)
13104 enum rtx_code code
= GET_CODE (op
);
13105 machine_mode cc_mode
;
13110 if (!COMPARISON_P (op
))
13113 reg
= XEXP (op
, 0);
13115 if (!REG_P (reg
) || !CR_REGNO_P (REGNO (reg
)))
13118 cc_mode
= GET_MODE (reg
);
13119 cc_regnum
= REGNO (reg
);
13120 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
13122 validate_condition_mode (code
, cc_mode
);
13124 /* When generating a sCOND operation, only positive conditions are
13143 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
13145 return base_bit
+ 2;
13146 case GT
: case GTU
: case UNLE
:
13147 return base_bit
+ 1;
13148 case LT
: case LTU
: case UNGE
:
13150 case ORDERED
: case UNORDERED
:
13151 return base_bit
+ 3;
13154 /* If scc, we will have done a cror to put the bit in the
13155 unordered position. So test that bit. For integer, this is ! LT
13156 unless this is an scc insn. */
13157 return scc_p
? base_bit
+ 3 : base_bit
;
13160 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
13167 /* Return the GOT register. */
13170 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
13172 /* The second flow pass currently (June 1999) can't update
13173 regs_ever_live without disturbing other parts of the compiler, so
13174 update it here to make the prolog/epilogue code happy. */
13175 if (!can_create_pseudo_p ()
13176 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
13177 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
13179 crtl
->uses_pic_offset_table
= 1;
13181 return pic_offset_table_rtx
;
13184 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13186 /* Write out a function code label. */
13189 rs6000_output_function_entry (FILE *file
, const char *fname
)
13191 if (fname
[0] != '.')
13193 switch (DEFAULT_ABI
)
13196 gcc_unreachable ();
13202 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
13212 RS6000_OUTPUT_BASENAME (file
, fname
);
13215 /* Print an operand. Recognize special options, documented below. */
13218 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13219 only introduced by the linker, when applying the sda21
13221 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13222 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13224 #define SMALL_DATA_RELOC "sda21"
13225 #define SMALL_DATA_REG 0
13229 print_operand (FILE *file
, rtx x
, int code
)
13232 unsigned HOST_WIDE_INT uval
;
13236 /* %a is output_address. */
13238 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13242 /* Write the MMA accumulator number associated with VSX register X. */
13243 if (!REG_P (x
) || !FP_REGNO_P (REGNO (x
)) || (REGNO (x
) % 4) != 0)
13244 output_operand_lossage ("invalid %%A value");
13246 fprintf (file
, "%d", (REGNO (x
) - FIRST_FPR_REGNO
) / 4);
13250 /* Like 'J' but get to the GT bit only. */
13251 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13253 output_operand_lossage ("invalid %%D value");
13257 /* Bit 1 is GT bit. */
13258 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
13260 /* Add one for shift count in rlinm for scc. */
13261 fprintf (file
, "%d", i
+ 1);
13265 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13268 output_operand_lossage ("invalid %%e value");
13273 if ((uval
& 0xffff) == 0 && uval
!= 0)
13278 /* X is a CR register. Print the number of the EQ bit of the CR */
13279 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13280 output_operand_lossage ("invalid %%E value");
13282 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
13286 /* X is a CR register. Print the shift count needed to move it
13287 to the high-order four bits. */
13288 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13289 output_operand_lossage ("invalid %%f value");
13291 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
13295 /* Similar, but print the count for the rotate in the opposite
13297 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13298 output_operand_lossage ("invalid %%F value");
13300 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
13304 /* X is a constant integer. If it is negative, print "m",
13305 otherwise print "z". This is to make an aze or ame insn. */
13306 if (!CONST_INT_P (x
))
13307 output_operand_lossage ("invalid %%G value");
13308 else if (INTVAL (x
) >= 0)
13315 /* If constant, output low-order five bits. Otherwise, write
13318 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
13320 print_operand (file
, x
, 0);
13324 /* If constant, output low-order six bits. Otherwise, write
13327 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
13329 print_operand (file
, x
, 0);
13333 /* Print `i' if this is a constant, else nothing. */
13339 /* Write the bit number in CCR for jump. */
13340 i
= ccr_bit (x
, 0);
13342 output_operand_lossage ("invalid %%j code");
13344 fprintf (file
, "%d", i
);
13348 /* Similar, but add one for shift count in rlinm for scc and pass
13349 scc flag to `ccr_bit'. */
13350 i
= ccr_bit (x
, 1);
13352 output_operand_lossage ("invalid %%J code");
13354 /* If we want bit 31, write a shift count of zero, not 32. */
13355 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
13359 /* X must be a constant. Write the 1's complement of the
13362 output_operand_lossage ("invalid %%k value");
13364 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
13368 /* X must be a symbolic constant on ELF. Write an
13369 expression suitable for an 'addi' that adds in the low 16
13370 bits of the MEM. */
13371 if (GET_CODE (x
) == CONST
)
13373 if (GET_CODE (XEXP (x
, 0)) != PLUS
13374 || (!SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
13375 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
13376 || !CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
13377 output_operand_lossage ("invalid %%K value");
13379 print_operand_address (file
, x
);
13380 fputs ("@l", file
);
13383 /* %l is output_asm_label. */
13386 /* Write second word of DImode or DFmode reference. Works on register
13387 or non-indexed memory only. */
13389 fputs (reg_names
[REGNO (x
) + 1], file
);
13390 else if (MEM_P (x
))
13392 machine_mode mode
= GET_MODE (x
);
13393 /* Handle possible auto-increment. Since it is pre-increment and
13394 we have already done it, we can just use an offset of word. */
13395 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
13396 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13397 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
13399 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13400 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
13403 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
13407 if (small_data_operand (x
, GET_MODE (x
)))
13408 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13409 reg_names
[SMALL_DATA_REG
]);
13413 case 'N': /* Unused */
13414 /* Write the number of elements in the vector times 4. */
13415 if (GET_CODE (x
) != PARALLEL
)
13416 output_operand_lossage ("invalid %%N value");
13418 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
13421 case 'O': /* Unused */
13422 /* Similar, but subtract 1 first. */
13423 if (GET_CODE (x
) != PARALLEL
)
13424 output_operand_lossage ("invalid %%O value");
13426 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
13430 /* X is a CONST_INT that is a power of two. Output the logarithm. */
13433 || (i
= exact_log2 (INTVAL (x
))) < 0)
13434 output_operand_lossage ("invalid %%p value");
13436 fprintf (file
, "%d", i
);
13440 /* The operand must be an indirect memory reference. The result
13441 is the register name. */
13442 if (!MEM_P (x
) || !REG_P (XEXP (x
, 0))
13443 || REGNO (XEXP (x
, 0)) >= 32)
13444 output_operand_lossage ("invalid %%P value");
13446 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
13450 /* This outputs the logical code corresponding to a boolean
13451 expression. The expression may have one or both operands
13452 negated (if one, only the first one). For condition register
13453 logical operations, it will also treat the negated
13454 CR codes as NOTs, but not handle NOTs of them. */
13456 const char *const *t
= 0;
13458 enum rtx_code code
= GET_CODE (x
);
13459 static const char * const tbl
[3][3] = {
13460 { "and", "andc", "nor" },
13461 { "or", "orc", "nand" },
13462 { "xor", "eqv", "xor" } };
13466 else if (code
== IOR
)
13468 else if (code
== XOR
)
13471 output_operand_lossage ("invalid %%q value");
13473 if (GET_CODE (XEXP (x
, 0)) != NOT
)
13477 if (GET_CODE (XEXP (x
, 1)) == NOT
)
13488 if (! TARGET_MFCRF
)
13494 /* X is a CR register. Print the mask for `mtcrf'. */
13495 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13496 output_operand_lossage ("invalid %%R value");
13498 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
13502 /* Low 5 bits of 32 - value */
13504 output_operand_lossage ("invalid %%s value");
13506 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
13510 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13511 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13513 output_operand_lossage ("invalid %%t value");
13517 /* Bit 3 is OV bit. */
13518 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
13520 /* If we want bit 31, write a shift count of zero, not 32. */
13521 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
13525 /* Print the symbolic name of a branch target register. */
13526 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
13527 x
= XVECEXP (x
, 0, 0);
13528 if (!REG_P (x
) || (REGNO (x
) != LR_REGNO
13529 && REGNO (x
) != CTR_REGNO
))
13530 output_operand_lossage ("invalid %%T value");
13531 else if (REGNO (x
) == LR_REGNO
)
13532 fputs ("lr", file
);
13534 fputs ("ctr", file
);
13538 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13539 for use in unsigned operand. */
13542 output_operand_lossage ("invalid %%u value");
13547 if ((uval
& 0xffff) == 0)
13550 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
13554 /* High-order 16 bits of constant for use in signed operand. */
13556 output_operand_lossage ("invalid %%v value");
13558 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
13559 (INTVAL (x
) >> 16) & 0xffff);
13563 /* Print `u' if this has an auto-increment or auto-decrement. */
13565 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
13566 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
13567 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
13572 /* Print the trap code for this operand. */
13573 switch (GET_CODE (x
))
13576 fputs ("eq", file
); /* 4 */
13579 fputs ("ne", file
); /* 24 */
13582 fputs ("lt", file
); /* 16 */
13585 fputs ("le", file
); /* 20 */
13588 fputs ("gt", file
); /* 8 */
13591 fputs ("ge", file
); /* 12 */
13594 fputs ("llt", file
); /* 2 */
13597 fputs ("lle", file
); /* 6 */
13600 fputs ("lgt", file
); /* 1 */
13603 fputs ("lge", file
); /* 5 */
13606 output_operand_lossage ("invalid %%V value");
13611 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13614 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
13615 ((INTVAL (x
) & 0xffff) ^ 0x8000) - 0x8000);
13617 print_operand (file
, x
, 0);
13621 /* X is a FPR or Altivec register used in a VSX context. */
13622 if (!REG_P (x
) || !VSX_REGNO_P (REGNO (x
)))
13623 output_operand_lossage ("invalid %%x value");
13626 int reg
= REGNO (x
);
13627 int vsx_reg
= (FP_REGNO_P (reg
)
13629 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
13631 #ifdef TARGET_REGNAMES
13632 if (TARGET_REGNAMES
)
13633 fprintf (file
, "%%vs%d", vsx_reg
);
13636 fprintf (file
, "%d", vsx_reg
);
13642 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
13643 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
13644 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
13649 /* Like 'L', for third word of TImode/PTImode */
13651 fputs (reg_names
[REGNO (x
) + 2], file
);
13652 else if (MEM_P (x
))
13654 machine_mode mode
= GET_MODE (x
);
13655 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
13656 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13657 output_address (mode
, plus_constant (Pmode
,
13658 XEXP (XEXP (x
, 0), 0), 8));
13659 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13660 output_address (mode
, plus_constant (Pmode
,
13661 XEXP (XEXP (x
, 0), 0), 8));
13663 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
13664 if (small_data_operand (x
, GET_MODE (x
)))
13665 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13666 reg_names
[SMALL_DATA_REG
]);
13671 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
13672 x
= XVECEXP (x
, 0, 1);
13673 /* X is a SYMBOL_REF. Write out the name preceded by a
13674 period and without any trailing data in brackets. Used for function
13675 names. If we are configured for System V (or the embedded ABI) on
13676 the PowerPC, do not emit the period, since those systems do not use
13677 TOCs and the like. */
13678 if (!SYMBOL_REF_P (x
))
13680 output_operand_lossage ("invalid %%z value");
13684 /* For macho, check to see if we need a stub. */
13687 const char *name
= XSTR (x
, 0);
13689 if (darwin_symbol_stubs
13690 && MACHOPIC_INDIRECT
13691 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13692 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13694 assemble_name (file
, name
);
13696 else if (!DOT_SYMBOLS
)
13697 assemble_name (file
, XSTR (x
, 0));
13699 rs6000_output_function_entry (file
, XSTR (x
, 0));
13703 /* Like 'L', for last word of TImode/PTImode. */
13705 fputs (reg_names
[REGNO (x
) + 3], file
);
13706 else if (MEM_P (x
))
13708 machine_mode mode
= GET_MODE (x
);
13709 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
13710 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13711 output_address (mode
, plus_constant (Pmode
,
13712 XEXP (XEXP (x
, 0), 0), 12));
13713 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13714 output_address (mode
, plus_constant (Pmode
,
13715 XEXP (XEXP (x
, 0), 0), 12));
13717 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
13718 if (small_data_operand (x
, GET_MODE (x
)))
13719 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13720 reg_names
[SMALL_DATA_REG
]);
13724 /* Print AltiVec memory operand. */
13729 gcc_assert (MEM_P (x
));
13733 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x
))
13734 && GET_CODE (tmp
) == AND
13735 && CONST_INT_P (XEXP (tmp
, 1))
13736 && INTVAL (XEXP (tmp
, 1)) == -16)
13737 tmp
= XEXP (tmp
, 0);
13738 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
13739 && GET_CODE (tmp
) == PRE_MODIFY
)
13740 tmp
= XEXP (tmp
, 1);
13742 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
13745 if (GET_CODE (tmp
) != PLUS
13746 || !REG_P (XEXP (tmp
, 0))
13747 || !REG_P (XEXP (tmp
, 1)))
13749 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13753 if (REGNO (XEXP (tmp
, 0)) == 0)
13754 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
13755 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
13757 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
13758 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
13765 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
13766 else if (MEM_P (x
))
13768 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13769 know the width from the mode. */
13770 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
13771 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
13772 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
13773 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13774 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
13775 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
13776 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13777 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
13779 output_address (GET_MODE (x
), XEXP (x
, 0));
13781 else if (toc_relative_expr_p (x
, false,
13782 &tocrel_base_oac
, &tocrel_offset_oac
))
13783 /* This hack along with a corresponding hack in
13784 rs6000_output_addr_const_extra arranges to output addends
13785 where the assembler expects to find them. eg.
13786 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13787 without this hack would be output as "x@toc+4". We
13789 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
13790 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
13791 output_addr_const (file
, XVECEXP (x
, 0, 0));
13792 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
13793 output_addr_const (file
, XVECEXP (x
, 0, 1));
13795 output_addr_const (file
, x
);
13799 if (const char *name
= get_some_local_dynamic_name ())
13800 assemble_name (file
, name
);
13802 output_operand_lossage ("'%%&' used without any "
13803 "local dynamic TLS references");
13807 output_operand_lossage ("invalid %%xn code");
13811 /* Print the address of an operand. */
13814 print_operand_address (FILE *file
, rtx x
)
13817 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
13819 /* Is it a PC-relative address? */
13820 else if (TARGET_PCREL
&& pcrel_local_or_external_address (x
, VOIDmode
))
13822 HOST_WIDE_INT offset
;
13824 if (GET_CODE (x
) == CONST
)
13827 if (GET_CODE (x
) == PLUS
)
13829 offset
= INTVAL (XEXP (x
, 1));
13835 output_addr_const (file
, x
);
13838 fprintf (file
, "%+" PRId64
, offset
);
13840 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_LOCAL_P (x
))
13841 fprintf (file
, "@got");
13843 fprintf (file
, "@pcrel");
13845 else if (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
13846 || GET_CODE (x
) == LABEL_REF
)
13848 output_addr_const (file
, x
);
13849 if (small_data_operand (x
, GET_MODE (x
)))
13850 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13851 reg_names
[SMALL_DATA_REG
]);
13853 gcc_assert (!TARGET_TOC
);
13855 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
13856 && REG_P (XEXP (x
, 1)))
13858 if (REGNO (XEXP (x
, 0)) == 0)
13859 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
13860 reg_names
[ REGNO (XEXP (x
, 0)) ]);
13862 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
13863 reg_names
[ REGNO (XEXP (x
, 1)) ]);
13865 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
13866 && CONST_INT_P (XEXP (x
, 1)))
13867 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
13868 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
13870 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
13871 && CONSTANT_P (XEXP (x
, 1)))
13873 fprintf (file
, "lo16(");
13874 output_addr_const (file
, XEXP (x
, 1));
13875 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
13879 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
13880 && CONSTANT_P (XEXP (x
, 1)))
13882 output_addr_const (file
, XEXP (x
, 1));
13883 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
13886 else if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
13888 /* This hack along with a corresponding hack in
13889 rs6000_output_addr_const_extra arranges to output addends
13890 where the assembler expects to find them. eg.
13892 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13893 without this hack would be output as "x@toc+8@l(9)". We
13894 want "x+8@toc@l(9)". */
13895 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
13896 if (GET_CODE (x
) == LO_SUM
)
13897 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
13899 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base_oac
, 0, 1))]);
13902 output_addr_const (file
, x
);
13905 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13908 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
13910 if (GET_CODE (x
) == UNSPEC
)
13911 switch (XINT (x
, 1))
13913 case UNSPEC_TOCREL
:
13914 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x
, 0, 0))
13915 && REG_P (XVECEXP (x
, 0, 1))
13916 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
13917 output_addr_const (file
, XVECEXP (x
, 0, 0));
13918 if (x
== tocrel_base_oac
&& tocrel_offset_oac
!= const0_rtx
)
13920 if (INTVAL (tocrel_offset_oac
) >= 0)
13921 fprintf (file
, "+");
13922 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset_oac
));
13924 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
13927 assemble_name (file
, toc_label_name
);
13930 else if (TARGET_ELF
)
13931 fputs ("@toc", file
);
13935 case UNSPEC_MACHOPIC_OFFSET
:
13936 output_addr_const (file
, XVECEXP (x
, 0, 0));
13938 machopic_output_function_base_name (file
);
13945 /* Target hook for assembling integer objects. The PowerPC version has
13946 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13947 is defined. It also needs to handle DI-mode objects on 64-bit
13951 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
13953 #ifdef RELOCATABLE_NEEDS_FIXUP
13954 /* Special handling for SI values. */
13955 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
13957 static int recurse
= 0;
13959 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13960 the .fixup section. Since the TOC section is already relocated, we
13961 don't need to mark it here. We used to skip the text section, but it
13962 should never be valid for relocated addresses to be placed in the text
13964 if (DEFAULT_ABI
== ABI_V4
13965 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
13966 && in_section
!= toc_section
13968 && !CONST_SCALAR_INT_P (x
)
13974 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
13976 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
13977 fprintf (asm_out_file
, "\t.long\t(");
13978 output_addr_const (asm_out_file
, x
);
13979 fprintf (asm_out_file
, ")@fixup\n");
13980 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
13981 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
13982 fprintf (asm_out_file
, "\t.long\t");
13983 assemble_name (asm_out_file
, buf
);
13984 fprintf (asm_out_file
, "\n\t.previous\n");
13988 /* Remove initial .'s to turn a -mcall-aixdesc function
13989 address into the address of the descriptor, not the function
13991 else if (SYMBOL_REF_P (x
)
13992 && XSTR (x
, 0)[0] == '.'
13993 && DEFAULT_ABI
== ABI_AIX
)
13995 const char *name
= XSTR (x
, 0);
13996 while (*name
== '.')
13999 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
14003 #endif /* RELOCATABLE_NEEDS_FIXUP */
14004 return default_assemble_integer (x
, size
, aligned_p
);
14007 /* Return a template string for assembly to emit when making an
14008 external call. FUNOP is the call mem argument operand number. */
14010 static const char *
14011 rs6000_call_template_1 (rtx
*operands
, unsigned int funop
, bool sibcall
)
14013 /* -Wformat-overflow workaround, without which gcc thinks that %u
14014 might produce 10 digits. */
14015 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14019 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14021 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14022 sprintf (arg
, "(%%%u@tlsgd)", funop
+ 1);
14023 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14024 sprintf (arg
, "(%%&@tlsld)");
14027 /* The magic 32768 offset here corresponds to the offset of
14028 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14030 sprintf (z
, "%%z%u%s", funop
,
14031 (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
== 2
14034 static char str
[32]; /* 1 spare */
14035 if (rs6000_pcrel_p ())
14036 sprintf (str
, "b%s %s@notoc%s", sibcall
? "" : "l", z
, arg
);
14037 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
14038 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14039 sibcall
? "" : "\n\tnop");
14040 else if (DEFAULT_ABI
== ABI_V4
)
14041 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14042 flag_pic
? "@plt" : "");
14044 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14045 else if (DEFAULT_ABI
== ABI_DARWIN
)
14047 /* The cookie is in operand func+2. */
14048 gcc_checking_assert (GET_CODE (operands
[funop
+ 2]) == CONST_INT
);
14049 int cookie
= INTVAL (operands
[funop
+ 2]);
14050 if (cookie
& CALL_LONG
)
14052 tree funname
= get_identifier (XSTR (operands
[funop
], 0));
14053 tree labelname
= get_prev_label (funname
);
14054 gcc_checking_assert (labelname
&& !sibcall
);
14056 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14057 instruction will reach 'foo', otherwise link as 'bl L42'".
14058 "L42" should be a 'branch island', that will do a far jump to
14059 'foo'. Branch islands are generated in
14060 macho_branch_islands(). */
14061 sprintf (str
, "jbsr %%z%u,%.10s", funop
,
14062 IDENTIFIER_POINTER (labelname
));
14065 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14067 sprintf (str
, "b%s %s%s", sibcall
? "" : "l", z
, arg
);
14071 gcc_unreachable ();
14076 rs6000_call_template (rtx
*operands
, unsigned int funop
)
14078 return rs6000_call_template_1 (operands
, funop
, false);
14082 rs6000_sibcall_template (rtx
*operands
, unsigned int funop
)
14084 return rs6000_call_template_1 (operands
, funop
, true);
14087 /* As above, for indirect calls. */
14089 static const char *
14090 rs6000_indirect_call_template_1 (rtx
*operands
, unsigned int funop
,
14093 /* -Wformat-overflow workaround, without which gcc thinks that %u
14094 might produce 10 digits. Note that -Wformat-overflow will not
14095 currently warn here for str[], so do not rely on a warning to
14096 ensure str[] is correctly sized. */
14097 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14099 /* Currently, funop is either 0 or 1. The maximum string is always
14100 a !speculate 64-bit __tls_get_addr call.
14103 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14104 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14106 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14107 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14114 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14115 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14117 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14118 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14125 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14126 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14128 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14129 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14136 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14137 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14139 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14140 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14144 static char str
[160]; /* 8 spare */
14146 const char *ptrload
= TARGET_64BIT
? "d" : "wz";
14148 if (DEFAULT_ABI
== ABI_AIX
)
14151 ptrload
, funop
+ 3);
14153 /* We don't need the extra code to stop indirect call speculation if
14155 bool speculate
= (TARGET_MACHO
14156 || rs6000_speculate_indirect_jumps
14157 || (REG_P (operands
[funop
])
14158 && REGNO (operands
[funop
]) == LR_REGNO
));
14160 if (TARGET_PLTSEQ
&& GET_CODE (operands
[funop
]) == UNSPEC
)
14162 const char *rel64
= TARGET_64BIT
? "64" : "";
14165 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14167 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14168 sprintf (tls
, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14170 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14171 sprintf (tls
, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14175 const char *notoc
= rs6000_pcrel_p () ? "_NOTOC" : "";
14176 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14177 && flag_pic
== 2 ? "+32768" : "");
14181 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14182 tls
, rel64
, notoc
, funop
, addend
);
14183 s
+= sprintf (s
, "crset 2\n\t");
14186 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14187 tls
, rel64
, notoc
, funop
, addend
);
14189 else if (!speculate
)
14190 s
+= sprintf (s
, "crset 2\n\t");
14192 if (rs6000_pcrel_p ())
14195 sprintf (s
, "b%%T%ul", funop
);
14197 sprintf (s
, "beq%%T%ul-", funop
);
14199 else if (DEFAULT_ABI
== ABI_AIX
)
14205 funop
, ptrload
, funop
+ 4);
14210 funop
, ptrload
, funop
+ 4);
14212 else if (DEFAULT_ABI
== ABI_ELFv2
)
14218 funop
, ptrload
, funop
+ 3);
14223 funop
, ptrload
, funop
+ 3);
14230 funop
, sibcall
? "" : "l");
14234 funop
, sibcall
? "" : "l", sibcall
? "\n\tb $" : "");
14240 rs6000_indirect_call_template (rtx
*operands
, unsigned int funop
)
14242 return rs6000_indirect_call_template_1 (operands
, funop
, false);
14246 rs6000_indirect_sibcall_template (rtx
*operands
, unsigned int funop
)
14248 return rs6000_indirect_call_template_1 (operands
, funop
, true);
14252 /* Output indirect call insns. WHICH identifies the type of sequence. */
14254 rs6000_pltseq_template (rtx
*operands
, int which
)
14256 const char *rel64
= TARGET_64BIT
? "64" : "";
14259 if (GET_CODE (operands
[3]) == UNSPEC
)
14261 char off
= which
== RS6000_PLTSEQ_PLT_PCREL34
? '8' : '4';
14262 if (XINT (operands
[3], 1) == UNSPEC_TLSGD
)
14263 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14265 else if (XINT (operands
[3], 1) == UNSPEC_TLSLD
)
14266 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14270 gcc_assert (DEFAULT_ABI
== ABI_ELFv2
|| DEFAULT_ABI
== ABI_V4
);
14271 static char str
[96]; /* 10 spare */
14272 char off
= WORDS_BIG_ENDIAN
? '2' : '4';
14273 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14274 && flag_pic
== 2 ? "+32768" : "");
14277 case RS6000_PLTSEQ_TOCSAVE
:
14280 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14281 TARGET_64BIT
? "d 2,24(1)" : "w 2,12(1)",
14284 case RS6000_PLTSEQ_PLT16_HA
:
14285 if (DEFAULT_ABI
== ABI_V4
&& !flag_pic
)
14288 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14292 "addis %%0,%%1,0\n\t"
14293 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14294 tls
, off
, rel64
, addend
);
14296 case RS6000_PLTSEQ_PLT16_LO
:
14298 "l%s %%0,0(%%1)\n\t"
14299 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14300 TARGET_64BIT
? "d" : "wz",
14301 tls
, off
, rel64
, TARGET_64BIT
? "_DS" : "", addend
);
14303 case RS6000_PLTSEQ_MTCTR
:
14306 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14307 tls
, rel64
, addend
);
14309 case RS6000_PLTSEQ_PLT_PCREL34
:
14311 "pl%s %%0,0(0),1\n\t"
14312 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14313 TARGET_64BIT
? "d" : "wz",
14317 gcc_unreachable ();
14323 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14324 /* Emit an assembler directive to set symbol visibility for DECL to
14325 VISIBILITY_TYPE. */
14328 rs6000_assemble_visibility (tree decl
, int vis
)
14333 /* Functions need to have their entry point symbol visibility set as
14334 well as their descriptor symbol visibility. */
14335 if (DEFAULT_ABI
== ABI_AIX
14337 && TREE_CODE (decl
) == FUNCTION_DECL
)
14339 static const char * const visibility_types
[] = {
14340 NULL
, "protected", "hidden", "internal"
14343 const char *name
, *type
;
14345 name
= ((* targetm
.strip_name_encoding
)
14346 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
14347 type
= visibility_types
[vis
];
14349 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
14350 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
14353 default_assemble_visibility (decl
, vis
);
14358 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
14360 /* Reversal of FP compares takes care -- an ordered compare
14361 becomes an unordered compare and vice versa. */
14362 if (mode
== CCFPmode
14363 && (!flag_finite_math_only
14364 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
14365 || code
== UNEQ
|| code
== LTGT
))
14366 return reverse_condition_maybe_unordered (code
);
14368 return reverse_condition (code
);
14371 /* Generate a compare for CODE. Return a brand-new rtx that
14372 represents the result of the compare. */
14375 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
14377 machine_mode comp_mode
;
14378 rtx compare_result
;
14379 enum rtx_code code
= GET_CODE (cmp
);
14380 rtx op0
= XEXP (cmp
, 0);
14381 rtx op1
= XEXP (cmp
, 1);
14383 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
14384 comp_mode
= CCmode
;
14385 else if (FLOAT_MODE_P (mode
))
14386 comp_mode
= CCFPmode
;
14387 else if (code
== GTU
|| code
== LTU
14388 || code
== GEU
|| code
== LEU
)
14389 comp_mode
= CCUNSmode
;
14390 else if ((code
== EQ
|| code
== NE
)
14391 && unsigned_reg_p (op0
)
14392 && (unsigned_reg_p (op1
)
14393 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
14394 /* These are unsigned values, perhaps there will be a later
14395 ordering compare that can be shared with this one. */
14396 comp_mode
= CCUNSmode
;
14398 comp_mode
= CCmode
;
14400 /* If we have an unsigned compare, make sure we don't have a signed value as
14402 if (comp_mode
== CCUNSmode
&& CONST_INT_P (op1
)
14403 && INTVAL (op1
) < 0)
14405 op0
= copy_rtx_if_shared (op0
);
14406 op1
= force_reg (GET_MODE (op0
), op1
);
14407 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
14410 /* First, the compare. */
14411 compare_result
= gen_reg_rtx (comp_mode
);
14413 /* IEEE 128-bit support in VSX registers when we do not have hardware
14415 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
14417 rtx libfunc
= NULL_RTX
;
14418 bool check_nan
= false;
14425 libfunc
= optab_libfunc (eq_optab
, mode
);
14430 libfunc
= optab_libfunc (ge_optab
, mode
);
14435 libfunc
= optab_libfunc (le_optab
, mode
);
14440 libfunc
= optab_libfunc (unord_optab
, mode
);
14441 code
= (code
== UNORDERED
) ? NE
: EQ
;
14447 libfunc
= optab_libfunc (ge_optab
, mode
);
14448 code
= (code
== UNGE
) ? GE
: GT
;
14454 libfunc
= optab_libfunc (le_optab
, mode
);
14455 code
= (code
== UNLE
) ? LE
: LT
;
14461 libfunc
= optab_libfunc (eq_optab
, mode
);
14462 code
= (code
= UNEQ
) ? EQ
: NE
;
14466 gcc_unreachable ();
14469 gcc_assert (libfunc
);
14472 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
14473 SImode
, op0
, mode
, op1
, mode
);
14475 /* The library signals an exception for signalling NaNs, so we need to
14476 handle isgreater, etc. by first checking isordered. */
14479 rtx ne_rtx
, normal_dest
, unord_dest
;
14480 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
14481 rtx join_label
= gen_label_rtx ();
14482 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
14483 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
14486 /* Test for either value being a NaN. */
14487 gcc_assert (unord_func
);
14488 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
14489 SImode
, op0
, mode
, op1
, mode
);
14491 /* Set value (0) if either value is a NaN, and jump to the join
14493 dest
= gen_reg_rtx (SImode
);
14494 emit_move_insn (dest
, const1_rtx
);
14495 emit_insn (gen_rtx_SET (unord_cmp
,
14496 gen_rtx_COMPARE (comp_mode
, unord_dest
,
14499 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
14500 emit_jump_insn (gen_rtx_SET (pc_rtx
,
14501 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
14505 /* Do the normal comparison, knowing that the values are not
14507 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
14508 SImode
, op0
, mode
, op1
, mode
);
14510 emit_insn (gen_cstoresi4 (dest
,
14511 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
14513 normal_dest
, const0_rtx
));
14515 /* Join NaN and non-Nan paths. Compare dest against 0. */
14516 emit_label (join_label
);
14520 emit_insn (gen_rtx_SET (compare_result
,
14521 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
14526 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14527 CLOBBERs to match cmptf_internal2 pattern. */
14528 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
14529 && FLOAT128_IBM_P (GET_MODE (op0
))
14530 && TARGET_HARD_FLOAT
)
14531 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
14533 gen_rtx_SET (compare_result
,
14534 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
14535 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14536 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14537 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14538 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14539 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14540 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14541 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14542 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14543 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
14544 else if (GET_CODE (op1
) == UNSPEC
14545 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
14547 rtx op1b
= XVECEXP (op1
, 0, 0);
14548 comp_mode
= CCEQmode
;
14549 compare_result
= gen_reg_rtx (CCEQmode
);
14551 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
14553 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
14556 emit_insn (gen_rtx_SET (compare_result
,
14557 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
14560 validate_condition_mode (code
, GET_MODE (compare_result
));
14562 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
14566 /* Return the diagnostic message string if the binary operation OP is
14567 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14570 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
14574 machine_mode mode1
= TYPE_MODE (type1
);
14575 machine_mode mode2
= TYPE_MODE (type2
);
14577 /* For complex modes, use the inner type. */
14578 if (COMPLEX_MODE_P (mode1
))
14579 mode1
= GET_MODE_INNER (mode1
);
14581 if (COMPLEX_MODE_P (mode2
))
14582 mode2
= GET_MODE_INNER (mode2
);
14584 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14585 double to intermix unless -mfloat128-convert. */
14586 if (mode1
== mode2
)
14589 if (!TARGET_FLOAT128_CVT
)
14591 if ((FLOAT128_IEEE_P (mode1
) && FLOAT128_IBM_P (mode2
))
14592 || (FLOAT128_IBM_P (mode1
) && FLOAT128_IEEE_P (mode2
)))
14593 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
14601 /* Expand floating point conversion to/from __float128 and __ibm128. */
14604 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
14606 machine_mode dest_mode
= GET_MODE (dest
);
14607 machine_mode src_mode
= GET_MODE (src
);
14608 convert_optab cvt
= unknown_optab
;
14609 bool do_move
= false;
14610 rtx libfunc
= NULL_RTX
;
14612 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
14613 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
14617 rtx_2func_t from_df
;
14618 rtx_2func_t from_sf
;
14619 rtx_2func_t from_si_sign
;
14620 rtx_2func_t from_si_uns
;
14621 rtx_2func_t from_di_sign
;
14622 rtx_2func_t from_di_uns
;
14625 rtx_2func_t to_si_sign
;
14626 rtx_2func_t to_si_uns
;
14627 rtx_2func_t to_di_sign
;
14628 rtx_2func_t to_di_uns
;
14629 } hw_conversions
[2] = {
14630 /* convertions to/from KFmode */
14632 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
14633 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
14634 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
14635 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
14636 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
14637 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
14638 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
14639 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
14640 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
14641 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
14642 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
14643 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
14646 /* convertions to/from TFmode */
14648 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
14649 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
14650 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
14651 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
14652 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
14653 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
14654 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
14655 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
14656 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
14657 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
14658 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
14659 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
14663 if (dest_mode
== src_mode
)
14664 gcc_unreachable ();
14666 /* Eliminate memory operations. */
14668 src
= force_reg (src_mode
, src
);
14672 rtx tmp
= gen_reg_rtx (dest_mode
);
14673 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
14674 rs6000_emit_move (dest
, tmp
, dest_mode
);
14678 /* Convert to IEEE 128-bit floating point. */
14679 if (FLOAT128_IEEE_P (dest_mode
))
14681 if (dest_mode
== KFmode
)
14683 else if (dest_mode
== TFmode
)
14686 gcc_unreachable ();
14692 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
14697 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
14703 if (FLOAT128_IBM_P (src_mode
))
14712 cvt
= ufloat_optab
;
14713 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
14717 cvt
= sfloat_optab
;
14718 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
14725 cvt
= ufloat_optab
;
14726 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
14730 cvt
= sfloat_optab
;
14731 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
14736 gcc_unreachable ();
14740 /* Convert from IEEE 128-bit floating point. */
14741 else if (FLOAT128_IEEE_P (src_mode
))
14743 if (src_mode
== KFmode
)
14745 else if (src_mode
== TFmode
)
14748 gcc_unreachable ();
14754 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
14759 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
14765 if (FLOAT128_IBM_P (dest_mode
))
14775 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
14780 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
14788 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
14793 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
14798 gcc_unreachable ();
14802 /* Both IBM format. */
14803 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
14807 gcc_unreachable ();
14809 /* Handle conversion between TFmode/KFmode/IFmode. */
14811 emit_insn (gen_rtx_SET (dest
, gen_rtx_FLOAT_EXTEND (dest_mode
, src
)));
14813 /* Handle conversion if we have hardware support. */
14814 else if (TARGET_FLOAT128_HW
&& hw_convert
)
14815 emit_insn ((hw_convert
) (dest
, src
));
14817 /* Call an external function to do the conversion. */
14818 else if (cvt
!= unknown_optab
)
14820 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
14821 gcc_assert (libfunc
!= NULL_RTX
);
14823 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
,
14826 gcc_assert (dest2
!= NULL_RTX
);
14827 if (!rtx_equal_p (dest
, dest2
))
14828 emit_move_insn (dest
, dest2
);
14832 gcc_unreachable ();
14838 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14839 can be used as that dest register. Return the dest register. */
14842 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
14844 if (op2
== const0_rtx
)
14847 if (GET_CODE (scratch
) == SCRATCH
)
14848 scratch
= gen_reg_rtx (mode
);
14850 if (logical_operand (op2
, mode
))
14851 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
14853 emit_insn (gen_rtx_SET (scratch
,
14854 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
14859 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14860 requires this. The result is mode MODE. */
14862 rs6000_emit_fp_cror (rtx_code code
, machine_mode mode
, rtx x
)
14866 if (code
== LTGT
|| code
== LE
|| code
== UNLT
)
14867 cond
[n
++] = gen_rtx_fmt_ee (LT
, mode
, x
, const0_rtx
);
14868 if (code
== LTGT
|| code
== GE
|| code
== UNGT
)
14869 cond
[n
++] = gen_rtx_fmt_ee (GT
, mode
, x
, const0_rtx
);
14870 if (code
== LE
|| code
== GE
|| code
== UNEQ
)
14871 cond
[n
++] = gen_rtx_fmt_ee (EQ
, mode
, x
, const0_rtx
);
14872 if (code
== UNLT
|| code
== UNGT
|| code
== UNEQ
)
14873 cond
[n
++] = gen_rtx_fmt_ee (UNORDERED
, mode
, x
, const0_rtx
);
14875 gcc_assert (n
== 2);
14877 rtx cc
= gen_reg_rtx (CCEQmode
);
14878 rtx logical
= gen_rtx_IOR (mode
, cond
[0], cond
[1]);
14879 emit_insn (gen_cceq_ior_compare (mode
, cc
, logical
, cond
[0], x
, cond
[1], x
));
14885 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
14887 rtx condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
14888 rtx_code cond_code
= GET_CODE (condition_rtx
);
14890 if (FLOAT_MODE_P (mode
) && HONOR_NANS (mode
)
14891 && !(FLOAT128_VECTOR_P (mode
) && !TARGET_FLOAT128_HW
))
14893 else if (cond_code
== NE
14894 || cond_code
== GE
|| cond_code
== LE
14895 || cond_code
== GEU
|| cond_code
== LEU
14896 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
14898 rtx not_result
= gen_reg_rtx (CCEQmode
);
14899 rtx not_op
, rev_cond_rtx
;
14900 machine_mode cc_mode
;
14902 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
14904 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
14905 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
14906 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
14907 emit_insn (gen_rtx_SET (not_result
, not_op
));
14908 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
14911 machine_mode op_mode
= GET_MODE (XEXP (operands
[1], 0));
14912 if (op_mode
== VOIDmode
)
14913 op_mode
= GET_MODE (XEXP (operands
[1], 1));
14915 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
14917 PUT_MODE (condition_rtx
, DImode
);
14918 convert_move (operands
[0], condition_rtx
, 0);
14922 PUT_MODE (condition_rtx
, SImode
);
14923 emit_insn (gen_rtx_SET (operands
[0], condition_rtx
));
14927 /* Emit a branch of kind CODE to location LOC. */
14930 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
14932 rtx condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
14933 rtx loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
14934 rtx ite
= gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
, loc_ref
, pc_rtx
);
14935 emit_jump_insn (gen_rtx_SET (pc_rtx
, ite
));
14938 /* Return the string to output a conditional branch to LABEL, which is
14939 the operand template of the label, or NULL if the branch is really a
14940 conditional return.
14942 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14943 condition code register and its mode specifies what kind of
14944 comparison we made.
14946 REVERSED is nonzero if we should reverse the sense of the comparison.
14948 INSN is the insn. */
14951 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
14953 static char string
[64];
14954 enum rtx_code code
= GET_CODE (op
);
14955 rtx cc_reg
= XEXP (op
, 0);
14956 machine_mode mode
= GET_MODE (cc_reg
);
14957 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
14958 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
14959 int really_reversed
= reversed
^ need_longbranch
;
14965 validate_condition_mode (code
, mode
);
14967 /* Work out which way this really branches. We could use
14968 reverse_condition_maybe_unordered here always but this
14969 makes the resulting assembler clearer. */
14970 if (really_reversed
)
14972 /* Reversal of FP compares takes care -- an ordered compare
14973 becomes an unordered compare and vice versa. */
14974 if (mode
== CCFPmode
)
14975 code
= reverse_condition_maybe_unordered (code
);
14977 code
= reverse_condition (code
);
14982 /* Not all of these are actually distinct opcodes, but
14983 we distinguish them for clarity of the resulting assembler. */
14984 case NE
: case LTGT
:
14985 ccode
= "ne"; break;
14986 case EQ
: case UNEQ
:
14987 ccode
= "eq"; break;
14989 ccode
= "ge"; break;
14990 case GT
: case GTU
: case UNGT
:
14991 ccode
= "gt"; break;
14993 ccode
= "le"; break;
14994 case LT
: case LTU
: case UNLT
:
14995 ccode
= "lt"; break;
14996 case UNORDERED
: ccode
= "un"; break;
14997 case ORDERED
: ccode
= "nu"; break;
14998 case UNGE
: ccode
= "nl"; break;
14999 case UNLE
: ccode
= "ng"; break;
15001 gcc_unreachable ();
15004 /* Maybe we have a guess as to how likely the branch is. */
15006 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
15007 if (note
!= NULL_RTX
)
15009 /* PROB is the difference from 50%. */
15010 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
15011 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
15013 /* Only hint for highly probable/improbable branches on newer cpus when
15014 we have real profile data, as static prediction overrides processor
15015 dynamic prediction. For older cpus we may as well always hint, but
15016 assume not taken for branches that are very close to 50% as a
15017 mispredicted taken branch is more expensive than a
15018 mispredicted not-taken branch. */
15019 if (rs6000_always_hint
15020 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
15021 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
15022 && br_prob_note_reliable_p (note
)))
15024 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
15025 && ((prob
> 0) ^ need_longbranch
))
15033 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
15035 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
15037 /* We need to escape any '%' characters in the reg_names string.
15038 Assume they'd only be the first character.... */
15039 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
15041 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
15045 /* If the branch distance was too far, we may have to use an
15046 unconditional branch to go the distance. */
15047 if (need_longbranch
)
15048 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
15050 s
+= sprintf (s
, ",%s", label
);
15056 /* Return insn for VSX or Altivec comparisons. */
15059 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
15062 machine_mode mode
= GET_MODE (op0
);
15070 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15081 mask
= gen_reg_rtx (mode
);
15082 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
15089 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15090 DMODE is expected destination mode. This is a recursive function. */
15093 rs6000_emit_vector_compare (enum rtx_code rcode
,
15095 machine_mode dmode
)
15098 bool swap_operands
= false;
15099 bool try_again
= false;
15101 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
15102 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
15104 /* See if the comparison works as is. */
15105 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15113 swap_operands
= true;
15118 swap_operands
= true;
15126 /* Invert condition and try again.
15127 e.g., A != B becomes ~(A==B). */
15129 enum rtx_code rev_code
;
15130 enum insn_code nor_code
;
15133 rev_code
= reverse_condition_maybe_unordered (rcode
);
15134 if (rev_code
== UNKNOWN
)
15137 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
15138 if (nor_code
== CODE_FOR_nothing
)
15141 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
15145 mask
= gen_reg_rtx (dmode
);
15146 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
15154 /* Try GT/GTU/LT/LTU OR EQ */
15157 enum insn_code ior_code
;
15158 enum rtx_code new_code
;
15179 gcc_unreachable ();
15182 ior_code
= optab_handler (ior_optab
, dmode
);
15183 if (ior_code
== CODE_FOR_nothing
)
15186 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
15190 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
15194 mask
= gen_reg_rtx (dmode
);
15195 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
15206 std::swap (op0
, op1
);
15208 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15213 /* You only get two chances. */
15217 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15218 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15219 operands for the relation operation COND. */
15222 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
15223 rtx cond
, rtx cc_op0
, rtx cc_op1
)
15225 machine_mode dest_mode
= GET_MODE (dest
);
15226 machine_mode mask_mode
= GET_MODE (cc_op0
);
15227 enum rtx_code rcode
= GET_CODE (cond
);
15228 machine_mode cc_mode
= CCmode
;
15231 bool invert_move
= false;
15233 if (VECTOR_UNIT_NONE_P (dest_mode
))
15236 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
15237 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
15241 /* Swap operands if we can, and fall back to doing the operation as
15242 specified, and doing a NOR to invert the test. */
15248 /* Invert condition and try again.
15249 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15250 invert_move
= true;
15251 rcode
= reverse_condition_maybe_unordered (rcode
);
15252 if (rcode
== UNKNOWN
)
15258 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
15260 /* Invert condition to avoid compound test. */
15261 invert_move
= true;
15262 rcode
= reverse_condition (rcode
);
15270 /* Mark unsigned tests with CCUNSmode. */
15271 cc_mode
= CCUNSmode
;
15273 /* Invert condition to avoid compound test if necessary. */
15274 if (rcode
== GEU
|| rcode
== LEU
)
15276 invert_move
= true;
15277 rcode
= reverse_condition (rcode
);
15285 /* Get the vector mask for the given relational operations. */
15286 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
15292 std::swap (op_true
, op_false
);
15294 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15295 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
15296 && (GET_CODE (op_true
) == CONST_VECTOR
15297 || GET_CODE (op_false
) == CONST_VECTOR
))
15299 rtx constant_0
= CONST0_RTX (dest_mode
);
15300 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
15302 if (op_true
== constant_m1
&& op_false
== constant_0
)
15304 emit_move_insn (dest
, mask
);
15308 else if (op_true
== constant_0
&& op_false
== constant_m1
)
15310 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
15314 /* If we can't use the vector comparison directly, perhaps we can use
15315 the mask for the true or false fields, instead of loading up a
15317 if (op_true
== constant_m1
)
15320 if (op_false
== constant_0
)
15324 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
15325 op_true
= force_reg (dest_mode
, op_true
);
15327 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
15328 op_false
= force_reg (dest_mode
, op_false
);
15330 cond2
= gen_rtx_fmt_ee (NE
, cc_mode
, gen_lowpart (dest_mode
, mask
),
15331 CONST0_RTX (dest_mode
));
15332 emit_insn (gen_rtx_SET (dest
,
15333 gen_rtx_IF_THEN_ELSE (dest_mode
,
15340 /* Possibly emit the xsmaxcdp and xsmincdp instructions to emit a maximum or
15341 minimum with "C" semantics.
15343 Unless you use -ffast-math, you can't use these instructions to replace
15344 conditions that implicitly reverse the condition because the comparison
15345 might generate a NaN or signed zer0.
15347 I.e. the following can be replaced all of the time
15348 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15349 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15350 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15351 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15353 The following can be replaced only if -ffast-math is used:
15354 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15355 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15356 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15357 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15359 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15360 nonzero/true, FALSE_COND if it is zero/false.
15362 Return false if we can't generate the appropriate minimum or maximum, and
15363 true if we can did the minimum or maximum. */
15366 rs6000_maybe_emit_maxc_minc (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15368 enum rtx_code code
= GET_CODE (op
);
15369 rtx op0
= XEXP (op
, 0);
15370 rtx op1
= XEXP (op
, 1);
15371 machine_mode compare_mode
= GET_MODE (op0
);
15372 machine_mode result_mode
= GET_MODE (dest
);
15373 bool max_p
= false;
15375 if (result_mode
!= compare_mode
)
15378 if (code
== GE
|| code
== GT
)
15380 else if (code
== LE
|| code
== LT
)
15385 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
15388 /* Only when NaNs and signed-zeros are not in effect, smax could be
15389 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15390 `op0 > op1 ? op1 : op0`. */
15391 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
)
15392 && !HONOR_NANS (compare_mode
) && !HONOR_SIGNED_ZEROS (compare_mode
))
15398 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
15402 /* Possibly emit a floating point conditional move by generating a compare that
15403 sets a mask instruction and a XXSEL select instruction.
15405 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15406 nonzero/true, FALSE_COND if it is zero/false.
15408 Return false if the operation cannot be generated, and true if we could
15409 generate the instruction. */
15412 rs6000_maybe_emit_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15414 enum rtx_code code
= GET_CODE (op
);
15415 rtx op0
= XEXP (op
, 0);
15416 rtx op1
= XEXP (op
, 1);
15417 machine_mode result_mode
= GET_MODE (dest
);
15422 if (!can_create_pseudo_p ())
15435 code
= swap_condition (code
);
15436 std::swap (op0
, op1
);
15443 /* Generate: [(parallel [(set (dest)
15444 (if_then_else (op (cmp1) (cmp2))
15447 (clobber (scratch))])]. */
15449 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
15450 cmove_rtx
= gen_rtx_SET (dest
,
15451 gen_rtx_IF_THEN_ELSE (result_mode
,
15456 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
15457 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
15458 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
15463 /* Helper function to return true if the target has instructions to do a
15464 compare and set mask instruction that can be used with XXSEL to implement a
15465 conditional move. It is also assumed that such a target also supports the
15466 "C" minimum and maximum instructions. */
15469 have_compare_and_set_mask (machine_mode mode
)
15475 return TARGET_P9_MINMAX
;
15484 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
15485 operands of the last comparison is nonzero/true, FALSE_COND if it
15486 is zero/false. Return 0 if the hardware has no such operation. */
15489 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15491 enum rtx_code code
= GET_CODE (op
);
15492 rtx op0
= XEXP (op
, 0);
15493 rtx op1
= XEXP (op
, 1);
15494 machine_mode compare_mode
= GET_MODE (op0
);
15495 machine_mode result_mode
= GET_MODE (dest
);
15497 bool is_against_zero
;
15499 /* These modes should always match. */
15500 if (GET_MODE (op1
) != compare_mode
15501 /* In the isel case however, we can use a compare immediate, so
15502 op1 may be a small constant. */
15503 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
15505 if (GET_MODE (true_cond
) != result_mode
)
15507 if (GET_MODE (false_cond
) != result_mode
)
15510 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
15512 if (have_compare_and_set_mask (compare_mode
)
15513 && have_compare_and_set_mask (result_mode
))
15515 if (rs6000_maybe_emit_maxc_minc (dest
, op
, true_cond
, false_cond
))
15518 if (rs6000_maybe_emit_fp_cmove (dest
, op
, true_cond
, false_cond
))
15522 /* Don't allow using floating point comparisons for integer results for
15524 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
15527 /* First, work out if the hardware can do this at all, or
15528 if it's too slow.... */
15529 if (!FLOAT_MODE_P (compare_mode
))
15532 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
15536 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
15538 /* A floating-point subtract might overflow, underflow, or produce
15539 an inexact result, thus changing the floating-point flags, so it
15540 can't be generated if we care about that. It's safe if one side
15541 of the construct is zero, since then no subtract will be
15543 if (SCALAR_FLOAT_MODE_P (compare_mode
)
15544 && flag_trapping_math
&& ! is_against_zero
)
15547 /* Eliminate half of the comparisons by switching operands, this
15548 makes the remaining code simpler. */
15549 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
15550 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
15552 code
= reverse_condition_maybe_unordered (code
);
15554 true_cond
= false_cond
;
15558 /* UNEQ and LTGT take four instructions for a comparison with zero,
15559 it'll probably be faster to use a branch here too. */
15560 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
15563 /* We're going to try to implement comparisons by performing
15564 a subtract, then comparing against zero. Unfortunately,
15565 Inf - Inf is NaN which is not zero, and so if we don't
15566 know that the operand is finite and the comparison
15567 would treat EQ different to UNORDERED, we can't do it. */
15568 if (HONOR_INFINITIES (compare_mode
)
15569 && code
!= GT
&& code
!= UNGE
15570 && (!CONST_DOUBLE_P (op1
)
15571 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
15572 /* Constructs of the form (a OP b ? a : b) are safe. */
15573 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
15574 || (! rtx_equal_p (op0
, true_cond
)
15575 && ! rtx_equal_p (op1
, true_cond
))))
15578 /* At this point we know we can use fsel. */
15580 /* Don't allow compare_mode other than SFmode or DFmode, for others there
15581 is no fsel instruction. */
15582 if (compare_mode
!= SFmode
&& compare_mode
!= DFmode
)
15585 /* Reduce the comparison to a comparison against zero. */
15586 if (! is_against_zero
)
15588 temp
= gen_reg_rtx (compare_mode
);
15589 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
15591 op1
= CONST0_RTX (compare_mode
);
15594 /* If we don't care about NaNs we can reduce some of the comparisons
15595 down to faster ones. */
15596 if (! HONOR_NANS (compare_mode
))
15602 true_cond
= false_cond
;
15615 /* Now, reduce everything down to a GE. */
15622 temp
= gen_reg_rtx (compare_mode
);
15623 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
15628 temp
= gen_reg_rtx (compare_mode
);
15629 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
15634 temp
= gen_reg_rtx (compare_mode
);
15635 emit_insn (gen_rtx_SET (temp
,
15636 gen_rtx_NEG (compare_mode
,
15637 gen_rtx_ABS (compare_mode
, op0
))));
15642 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15643 temp
= gen_reg_rtx (result_mode
);
15644 emit_insn (gen_rtx_SET (temp
,
15645 gen_rtx_IF_THEN_ELSE (result_mode
,
15646 gen_rtx_GE (VOIDmode
,
15648 true_cond
, false_cond
)));
15649 false_cond
= true_cond
;
15652 temp
= gen_reg_rtx (compare_mode
);
15653 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
15658 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15659 temp
= gen_reg_rtx (result_mode
);
15660 emit_insn (gen_rtx_SET (temp
,
15661 gen_rtx_IF_THEN_ELSE (result_mode
,
15662 gen_rtx_GE (VOIDmode
,
15664 true_cond
, false_cond
)));
15665 true_cond
= false_cond
;
15668 temp
= gen_reg_rtx (compare_mode
);
15669 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
15674 gcc_unreachable ();
15677 emit_insn (gen_rtx_SET (dest
,
15678 gen_rtx_IF_THEN_ELSE (result_mode
,
15679 gen_rtx_GE (VOIDmode
,
15681 true_cond
, false_cond
)));
15685 /* Same as above, but for ints (isel). */
15688 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15690 rtx condition_rtx
, cr
;
15691 machine_mode mode
= GET_MODE (dest
);
15692 enum rtx_code cond_code
;
15693 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
15696 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
15699 /* We still have to do the compare, because isel doesn't do a
15700 compare, it just looks at the CRx bits set by a previous compare
15702 condition_rtx
= rs6000_generate_compare (op
, mode
);
15703 cond_code
= GET_CODE (condition_rtx
);
15704 cr
= XEXP (condition_rtx
, 0);
15705 signedp
= GET_MODE (cr
) == CCmode
;
15707 isel_func
= (mode
== SImode
15708 ? (signedp
? gen_isel_signed_si
: gen_isel_unsigned_si
)
15709 : (signedp
? gen_isel_signed_di
: gen_isel_unsigned_di
));
15713 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
15714 /* isel handles these directly. */
15718 /* We need to swap the sense of the comparison. */
15720 std::swap (false_cond
, true_cond
);
15721 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
15726 false_cond
= force_reg (mode
, false_cond
);
15727 if (true_cond
!= const0_rtx
)
15728 true_cond
= force_reg (mode
, true_cond
);
15730 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
15736 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
15738 machine_mode mode
= GET_MODE (op0
);
15742 /* VSX/altivec have direct min/max insns. */
15743 if ((code
== SMAX
|| code
== SMIN
)
15744 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
15745 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))))
15747 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
15751 if (code
== SMAX
|| code
== SMIN
)
15756 if (code
== SMAX
|| code
== UMAX
)
15757 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
15758 op0
, op1
, mode
, 0);
15760 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
15761 op1
, op0
, mode
, 0);
15762 gcc_assert (target
);
15763 if (target
!= dest
)
15764 emit_move_insn (dest
, target
);
15767 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15768 COND is true. Mark the jump as unlikely to be taken. */
15771 emit_unlikely_jump (rtx cond
, rtx label
)
15773 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
15774 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
15775 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
15778 /* A subroutine of the atomic operation splitters. Emit a load-locked
15779 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15780 the zero_extend operation. */
15783 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
15785 rtx (*fn
) (rtx
, rtx
) = NULL
;
15790 fn
= gen_load_lockedqi
;
15793 fn
= gen_load_lockedhi
;
15796 if (GET_MODE (mem
) == QImode
)
15797 fn
= gen_load_lockedqi_si
;
15798 else if (GET_MODE (mem
) == HImode
)
15799 fn
= gen_load_lockedhi_si
;
15801 fn
= gen_load_lockedsi
;
15804 fn
= gen_load_lockeddi
;
15807 fn
= gen_load_lockedti
;
15810 gcc_unreachable ();
15812 emit_insn (fn (reg
, mem
));
15815 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15816 instruction in MODE. */
15819 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
15821 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
15826 fn
= gen_store_conditionalqi
;
15829 fn
= gen_store_conditionalhi
;
15832 fn
= gen_store_conditionalsi
;
15835 fn
= gen_store_conditionaldi
;
15838 fn
= gen_store_conditionalti
;
15841 gcc_unreachable ();
15844 /* Emit sync before stwcx. to address PPC405 Erratum. */
15845 if (PPC405_ERRATUM77
)
15846 emit_insn (gen_hwsync ());
15848 emit_insn (fn (res
, mem
, val
));
15851 /* Expand barriers before and after a load_locked/store_cond sequence. */
15854 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
15856 rtx addr
= XEXP (mem
, 0);
15858 if (!legitimate_indirect_address_p (addr
, reload_completed
)
15859 && !legitimate_indexed_address_p (addr
, reload_completed
))
15861 addr
= force_reg (Pmode
, addr
);
15862 mem
= replace_equiv_address_nv (mem
, addr
);
15867 case MEMMODEL_RELAXED
:
15868 case MEMMODEL_CONSUME
:
15869 case MEMMODEL_ACQUIRE
:
15871 case MEMMODEL_RELEASE
:
15872 case MEMMODEL_ACQ_REL
:
15873 emit_insn (gen_lwsync ());
15875 case MEMMODEL_SEQ_CST
:
15876 emit_insn (gen_hwsync ());
15879 gcc_unreachable ();
15885 rs6000_post_atomic_barrier (enum memmodel model
)
15889 case MEMMODEL_RELAXED
:
15890 case MEMMODEL_CONSUME
:
15891 case MEMMODEL_RELEASE
:
15893 case MEMMODEL_ACQUIRE
:
15894 case MEMMODEL_ACQ_REL
:
15895 case MEMMODEL_SEQ_CST
:
15896 emit_insn (gen_isync ());
15899 gcc_unreachable ();
15903 /* A subroutine of the various atomic expanders. For sub-word operations,
15904 we must adjust things to operate on SImode. Given the original MEM,
15905 return a new aligned memory. Also build and return the quantities by
15906 which to shift and mask. */
15909 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
15911 rtx addr
, align
, shift
, mask
, mem
;
15912 HOST_WIDE_INT shift_mask
;
15913 machine_mode mode
= GET_MODE (orig_mem
);
15915 /* For smaller modes, we have to implement this via SImode. */
15916 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
15918 addr
= XEXP (orig_mem
, 0);
15919 addr
= force_reg (GET_MODE (addr
), addr
);
15921 /* Aligned memory containing subword. Generate a new memory. We
15922 do not want any of the existing MEM_ATTR data, as we're now
15923 accessing memory outside the original object. */
15924 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
15925 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15926 mem
= gen_rtx_MEM (SImode
, align
);
15927 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
15928 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
15929 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
15931 /* Shift amount for subword relative to aligned word. */
15932 shift
= gen_reg_rtx (SImode
);
15933 addr
= gen_lowpart (SImode
, addr
);
15934 rtx tmp
= gen_reg_rtx (SImode
);
15935 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
15936 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
15937 if (BYTES_BIG_ENDIAN
)
15938 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
15939 shift
, 1, OPTAB_LIB_WIDEN
);
15942 /* Mask for insertion. */
15943 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
15944 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15950 /* A subroutine of the various atomic expanders. For sub-word operands,
15951 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15954 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
15958 x
= gen_reg_rtx (SImode
);
15959 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
15960 gen_rtx_NOT (SImode
, mask
),
15963 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
15968 /* A subroutine of the various atomic expanders. For sub-word operands,
15969 extract WIDE to NARROW via SHIFT. */
15972 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
15974 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
15975 wide
, 1, OPTAB_LIB_WIDEN
);
15976 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
15979 /* Expand an atomic compare and swap operation. */
15982 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
15984 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
15985 rtx label1
, label2
, x
, mask
, shift
;
15986 machine_mode mode
, orig_mode
;
15987 enum memmodel mod_s
, mod_f
;
15990 boolval
= operands
[0];
15991 retval
= operands
[1];
15993 oldval
= operands
[3];
15994 newval
= operands
[4];
15995 is_weak
= (INTVAL (operands
[5]) != 0);
15996 mod_s
= memmodel_base (INTVAL (operands
[6]));
15997 mod_f
= memmodel_base (INTVAL (operands
[7]));
15998 orig_mode
= mode
= GET_MODE (mem
);
16000 mask
= shift
= NULL_RTX
;
16001 if (mode
== QImode
|| mode
== HImode
)
16003 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16004 lwarx and shift/mask operations. With power8, we need to do the
16005 comparison in SImode, but the store is still done in QI/HImode. */
16006 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
16008 if (!TARGET_SYNC_HI_QI
)
16010 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16012 /* Shift and mask OLDVAL into position with the word. */
16013 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
16014 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16016 /* Shift and mask NEWVAL into position within the word. */
16017 newval
= convert_modes (SImode
, mode
, newval
, 1);
16018 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
16019 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16022 /* Prepare to adjust the return value. */
16023 retval
= gen_reg_rtx (SImode
);
16026 else if (reg_overlap_mentioned_p (retval
, oldval
))
16027 oldval
= copy_to_reg (oldval
);
16029 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
16030 oldval
= copy_to_mode_reg (mode
, oldval
);
16032 if (reg_overlap_mentioned_p (retval
, newval
))
16033 newval
= copy_to_reg (newval
);
16035 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
16040 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16041 emit_label (XEXP (label1
, 0));
16043 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16045 emit_load_locked (mode
, retval
, mem
);
16049 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
16050 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16052 cond
= gen_reg_rtx (CCmode
);
16053 /* If we have TImode, synthesize a comparison. */
16054 if (mode
!= TImode
)
16055 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
16058 rtx xor1_result
= gen_reg_rtx (DImode
);
16059 rtx xor2_result
= gen_reg_rtx (DImode
);
16060 rtx or_result
= gen_reg_rtx (DImode
);
16061 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
16062 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
16063 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
16064 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
16066 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
16067 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
16068 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
16069 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
16072 emit_insn (gen_rtx_SET (cond
, x
));
16074 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16075 emit_unlikely_jump (x
, label2
);
16079 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
16081 emit_store_conditional (orig_mode
, cond
, mem
, x
);
16085 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16086 emit_unlikely_jump (x
, label1
);
16089 if (!is_mm_relaxed (mod_f
))
16090 emit_label (XEXP (label2
, 0));
16092 rs6000_post_atomic_barrier (mod_s
);
16094 if (is_mm_relaxed (mod_f
))
16095 emit_label (XEXP (label2
, 0));
16098 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
16099 else if (mode
!= GET_MODE (operands
[1]))
16100 convert_move (operands
[1], retval
, 1);
16102 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16103 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
16104 emit_insn (gen_rtx_SET (boolval
, x
));
16107 /* Expand an atomic exchange operation. */
16110 rs6000_expand_atomic_exchange (rtx operands
[])
16112 rtx retval
, mem
, val
, cond
;
16114 enum memmodel model
;
16115 rtx label
, x
, mask
, shift
;
16117 retval
= operands
[0];
16120 model
= memmodel_base (INTVAL (operands
[3]));
16121 mode
= GET_MODE (mem
);
16123 mask
= shift
= NULL_RTX
;
16124 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
16126 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16128 /* Shift and mask VAL into position with the word. */
16129 val
= convert_modes (SImode
, mode
, val
, 1);
16130 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16131 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16133 /* Prepare to adjust the return value. */
16134 retval
= gen_reg_rtx (SImode
);
16138 mem
= rs6000_pre_atomic_barrier (mem
, model
);
16140 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16141 emit_label (XEXP (label
, 0));
16143 emit_load_locked (mode
, retval
, mem
);
16147 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
16149 cond
= gen_reg_rtx (CCmode
);
16150 emit_store_conditional (mode
, cond
, mem
, x
);
16152 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16153 emit_unlikely_jump (x
, label
);
16155 rs6000_post_atomic_barrier (model
);
16158 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
16161 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16162 to perform. MEM is the memory on which to operate. VAL is the second
16163 operand of the binary operator. BEFORE and AFTER are optional locations to
16164 return the value of MEM either before of after the operation. MODEL_RTX
16165 is a CONST_INT containing the memory model to use. */
16168 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
16169 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
16171 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
16172 machine_mode mode
= GET_MODE (mem
);
16173 machine_mode store_mode
= mode
;
16174 rtx label
, x
, cond
, mask
, shift
;
16175 rtx before
= orig_before
, after
= orig_after
;
16177 mask
= shift
= NULL_RTX
;
16178 /* On power8, we want to use SImode for the operation. On previous systems,
16179 use the operation in a subword and shift/mask to get the proper byte or
16181 if (mode
== QImode
|| mode
== HImode
)
16183 if (TARGET_SYNC_HI_QI
)
16185 val
= convert_modes (SImode
, mode
, val
, 1);
16187 /* Prepare to adjust the return value. */
16188 before
= gen_reg_rtx (SImode
);
16190 after
= gen_reg_rtx (SImode
);
16195 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16197 /* Shift and mask VAL into position with the word. */
16198 val
= convert_modes (SImode
, mode
, val
, 1);
16199 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16200 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16206 /* We've already zero-extended VAL. That is sufficient to
16207 make certain that it does not affect other bits. */
16212 /* If we make certain that all of the other bits in VAL are
16213 set, that will be sufficient to not affect other bits. */
16214 x
= gen_rtx_NOT (SImode
, mask
);
16215 x
= gen_rtx_IOR (SImode
, x
, val
);
16216 emit_insn (gen_rtx_SET (val
, x
));
16223 /* These will all affect bits outside the field and need
16224 adjustment via MASK within the loop. */
16228 gcc_unreachable ();
16231 /* Prepare to adjust the return value. */
16232 before
= gen_reg_rtx (SImode
);
16234 after
= gen_reg_rtx (SImode
);
16235 store_mode
= mode
= SImode
;
16239 mem
= rs6000_pre_atomic_barrier (mem
, model
);
16241 label
= gen_label_rtx ();
16242 emit_label (label
);
16243 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
16245 if (before
== NULL_RTX
)
16246 before
= gen_reg_rtx (mode
);
16248 emit_load_locked (mode
, before
, mem
);
16252 x
= expand_simple_binop (mode
, AND
, before
, val
,
16253 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16254 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
16258 after
= expand_simple_binop (mode
, code
, before
, val
,
16259 after
, 1, OPTAB_LIB_WIDEN
);
16265 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
16266 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16267 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
16269 else if (store_mode
!= mode
)
16270 x
= convert_modes (store_mode
, mode
, x
, 1);
16272 cond
= gen_reg_rtx (CCmode
);
16273 emit_store_conditional (store_mode
, cond
, mem
, x
);
16275 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16276 emit_unlikely_jump (x
, label
);
16278 rs6000_post_atomic_barrier (model
);
16282 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16283 then do the calcuations in a SImode register. */
16285 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
16287 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
16289 else if (store_mode
!= mode
)
16291 /* QImode/HImode on machines with lbarx/lharx where we do the native
16292 operation and then do the calcuations in a SImode register. */
16294 convert_move (orig_before
, before
, 1);
16296 convert_move (orig_after
, after
, 1);
16298 else if (orig_after
&& after
!= orig_after
)
16299 emit_move_insn (orig_after
, after
);
16302 /* Emit instructions to move SRC to DST. Called by splitters for
16303 multi-register moves. It will emit at most one instruction for
16304 each register that is accessed; that is, it won't emit li/lis pairs
16305 (or equivalent for 64-bit code). One of SRC or DST must be a hard
16309 rs6000_split_multireg_move (rtx dst
, rtx src
)
16311 /* The register number of the first register being moved. */
16313 /* The mode that is to be moved. */
16315 /* The mode that the move is being done in, and its size. */
16316 machine_mode reg_mode
;
16318 /* The number of registers that will be moved. */
16321 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
16322 mode
= GET_MODE (dst
);
16323 nregs
= hard_regno_nregs (reg
, mode
);
16325 /* If we have a vector quad register for MMA, and this is a load or store,
16326 see if we can use vector paired load/stores. */
16327 if (mode
== XOmode
&& TARGET_MMA
16328 && (MEM_P (dst
) || MEM_P (src
)))
16333 /* If we have a vector pair/quad mode, split it into two/four separate
16335 else if (mode
== OOmode
|| mode
== XOmode
)
16336 reg_mode
= V1TImode
;
16337 else if (FP_REGNO_P (reg
))
16338 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
16339 (TARGET_HARD_FLOAT
? DFmode
: SFmode
);
16340 else if (ALTIVEC_REGNO_P (reg
))
16341 reg_mode
= V16QImode
;
16343 reg_mode
= word_mode
;
16344 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
16346 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
16348 /* TDmode residing in FP registers is special, since the ISA requires that
16349 the lower-numbered word of a register pair is always the most significant
16350 word, even in little-endian mode. This does not match the usual subreg
16351 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
16352 the appropriate constituent registers "by hand" in little-endian mode.
16354 Note we do not need to check for destructive overlap here since TDmode
16355 can only reside in even/odd register pairs. */
16356 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
16361 for (i
= 0; i
< nregs
; i
++)
16363 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
16364 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
16366 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
16367 i
* reg_mode_size
);
16369 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
16370 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
16372 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
16373 i
* reg_mode_size
);
16375 emit_insn (gen_rtx_SET (p_dst
, p_src
));
16381 /* The __vector_pair and __vector_quad modes are multi-register
16382 modes, so if we have to load or store the registers, we have to be
16383 careful to properly swap them if we're in little endian mode
16384 below. This means the last register gets the first memory
16385 location. We also need to be careful of using the right register
16386 numbers if we are splitting XO to OO. */
16387 if (mode
== OOmode
|| mode
== XOmode
)
16389 nregs
= hard_regno_nregs (reg
, mode
);
16390 int reg_mode_nregs
= hard_regno_nregs (reg
, reg_mode
);
16393 unsigned offset
= 0;
16394 unsigned size
= GET_MODE_SIZE (reg_mode
);
16396 /* If we are reading an accumulator register, we have to
16397 deprime it before we can access it. */
16399 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
16400 emit_insn (gen_mma_xxmfacc (src
, src
));
16402 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
16405 (WORDS_BIG_ENDIAN
) ? i
: (nregs
- reg_mode_nregs
- i
);
16406 rtx dst2
= adjust_address (dst
, reg_mode
, offset
);
16407 rtx src2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
16409 emit_insn (gen_rtx_SET (dst2
, src2
));
16417 unsigned offset
= 0;
16418 unsigned size
= GET_MODE_SIZE (reg_mode
);
16420 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
16423 (WORDS_BIG_ENDIAN
) ? i
: (nregs
- reg_mode_nregs
- i
);
16424 rtx dst2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
16425 rtx src2
= adjust_address (src
, reg_mode
, offset
);
16427 emit_insn (gen_rtx_SET (dst2
, src2
));
16430 /* If we are writing an accumulator register, we have to
16431 prime it after we've written it. */
16433 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
16434 emit_insn (gen_mma_xxmtacc (dst
, dst
));
16439 if (GET_CODE (src
) == UNSPEC
)
16441 gcc_assert (XINT (src
, 1) == UNSPEC_MMA_ASSEMBLE
);
16442 gcc_assert (REG_P (dst
));
16443 if (GET_MODE (src
) == XOmode
)
16444 gcc_assert (FP_REGNO_P (REGNO (dst
)));
16445 if (GET_MODE (src
) == OOmode
)
16446 gcc_assert (VSX_REGNO_P (REGNO (dst
)));
16448 reg_mode
= GET_MODE (XVECEXP (src
, 0, 0));
16449 for (int i
= 0; i
< XVECLEN (src
, 0); i
++)
16451 rtx dst_i
= gen_rtx_REG (reg_mode
, reg
+ i
);
16452 emit_insn (gen_rtx_SET (dst_i
, XVECEXP (src
, 0, i
)));
16455 /* We are writing an accumulator register, so we have to
16456 prime it after we've written it. */
16457 if (GET_MODE (src
) == XOmode
)
16458 emit_insn (gen_mma_xxmtacc (dst
, dst
));
16463 /* Register -> register moves can use common code. */
16466 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
16468 /* If we are reading an accumulator register, we have to
16469 deprime it before we can access it. */
16471 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
16472 emit_insn (gen_mma_xxmfacc (src
, src
));
16474 /* Move register range backwards, if we might have destructive
16477 /* XO/OO are opaque so cannot use subregs. */
16478 if (mode
== OOmode
|| mode
== XOmode
)
16480 for (i
= nregs
- 1; i
>= 0; i
--)
16482 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + i
);
16483 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + i
);
16484 emit_insn (gen_rtx_SET (dst_i
, src_i
));
16489 for (i
= nregs
- 1; i
>= 0; i
--)
16490 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
16491 i
* reg_mode_size
),
16492 simplify_gen_subreg (reg_mode
, src
, mode
,
16493 i
* reg_mode_size
)));
16496 /* If we are writing an accumulator register, we have to
16497 prime it after we've written it. */
16499 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
16500 emit_insn (gen_mma_xxmtacc (dst
, dst
));
16506 bool used_update
= false;
16507 rtx restore_basereg
= NULL_RTX
;
16509 if (MEM_P (src
) && INT_REGNO_P (reg
))
16513 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
16514 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
16517 breg
= XEXP (XEXP (src
, 0), 0);
16518 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
16519 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
16520 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
16521 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
16522 src
= replace_equiv_address (src
, breg
);
16524 else if (! rs6000_offsettable_memref_p (src
, reg_mode
, true))
16526 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
16528 rtx basereg
= XEXP (XEXP (src
, 0), 0);
16531 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
16532 emit_insn (gen_rtx_SET (ndst
,
16533 gen_rtx_MEM (reg_mode
,
16535 used_update
= true;
16538 emit_insn (gen_rtx_SET (basereg
,
16539 XEXP (XEXP (src
, 0), 1)));
16540 src
= replace_equiv_address (src
, basereg
);
16544 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
16545 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
16546 src
= replace_equiv_address (src
, basereg
);
16550 breg
= XEXP (src
, 0);
16551 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
16552 breg
= XEXP (breg
, 0);
16554 /* If the base register we are using to address memory is
16555 also a destination reg, then change that register last. */
16557 && REGNO (breg
) >= REGNO (dst
)
16558 && REGNO (breg
) < REGNO (dst
) + nregs
)
16559 j
= REGNO (breg
) - REGNO (dst
);
16561 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
16565 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
16566 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
16569 breg
= XEXP (XEXP (dst
, 0), 0);
16570 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
16571 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
16572 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
16574 /* We have to update the breg before doing the store.
16575 Use store with update, if available. */
16579 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
16580 emit_insn (TARGET_32BIT
16581 ? (TARGET_POWERPC64
16582 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
16583 : gen_movsi_si_update (breg
, breg
, delta_rtx
, nsrc
))
16584 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
16585 used_update
= true;
16588 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
16589 dst
= replace_equiv_address (dst
, breg
);
16591 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
, true)
16592 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
16594 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
16596 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
16599 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
16600 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
16603 used_update
= true;
16606 emit_insn (gen_rtx_SET (basereg
,
16607 XEXP (XEXP (dst
, 0), 1)));
16608 dst
= replace_equiv_address (dst
, basereg
);
16612 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
16613 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
16614 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
16616 && REG_P (offsetreg
)
16617 && REGNO (basereg
) != REGNO (offsetreg
));
16618 if (REGNO (basereg
) == 0)
16620 rtx tmp
= offsetreg
;
16621 offsetreg
= basereg
;
16624 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
16625 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
16626 dst
= replace_equiv_address (dst
, basereg
);
16629 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
16630 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
, true));
16633 /* If we are reading an accumulator register, we have to
16634 deprime it before we can access it. */
16635 if (TARGET_MMA
&& REG_P (src
)
16636 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
16637 emit_insn (gen_mma_xxmfacc (src
, src
));
16639 for (i
= 0; i
< nregs
; i
++)
16641 /* Calculate index to next subword. */
16646 /* If compiler already emitted move of first word by
16647 store with update, no need to do anything. */
16648 if (j
== 0 && used_update
)
16651 /* XO/OO are opaque so cannot use subregs. */
16652 if (mode
== OOmode
|| mode
== XOmode
)
16654 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + j
);
16655 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + j
);
16656 emit_insn (gen_rtx_SET (dst_i
, src_i
));
16659 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
16660 j
* reg_mode_size
),
16661 simplify_gen_subreg (reg_mode
, src
, mode
,
16662 j
* reg_mode_size
)));
16665 /* If we are writing an accumulator register, we have to
16666 prime it after we've written it. */
16667 if (TARGET_MMA
&& REG_P (dst
)
16668 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
16669 emit_insn (gen_mma_xxmtacc (dst
, dst
));
16671 if (restore_basereg
!= NULL_RTX
)
16672 emit_insn (restore_basereg
);
16676 static GTY(()) alias_set_type TOC_alias_set
= -1;
16679 get_TOC_alias_set (void)
16681 if (TOC_alias_set
== -1)
16682 TOC_alias_set
= new_alias_set ();
16683 return TOC_alias_set
;
16686 /* The mode the ABI uses for a word. This is not the same as word_mode
16687 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16689 static scalar_int_mode
16690 rs6000_abi_word_mode (void)
16692 return TARGET_32BIT
? SImode
: DImode
;
16695 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16697 rs6000_offload_options (void)
16700 return xstrdup ("-foffload-abi=lp64");
16702 return xstrdup ("-foffload-abi=ilp32");
16706 /* A quick summary of the various types of 'constant-pool tables'
16709 Target Flags Name One table per
16710 AIX (none) AIX TOC object file
16711 AIX -mfull-toc AIX TOC object file
16712 AIX -mminimal-toc AIX minimal TOC translation unit
16713 SVR4/EABI (none) SVR4 SDATA object file
16714 SVR4/EABI -fpic SVR4 pic object file
16715 SVR4/EABI -fPIC SVR4 PIC translation unit
16716 SVR4/EABI -mrelocatable EABI TOC function
16717 SVR4/EABI -maix AIX TOC object file
16718 SVR4/EABI -maix -mminimal-toc
16719 AIX minimal TOC translation unit
16721 Name Reg. Set by entries contains:
16722 made by addrs? fp? sum?
16724 AIX TOC 2 crt0 as Y option option
16725 AIX minimal TOC 30 prolog gcc Y Y option
16726 SVR4 SDATA 13 crt0 gcc N Y N
16727 SVR4 pic 30 prolog ld Y not yet N
16728 SVR4 PIC 30 prolog gcc Y option option
16729 EABI TOC 30 prolog gcc Y option option
16733 /* Hash functions for the hash table. */
16736 rs6000_hash_constant (rtx k
)
16738 enum rtx_code code
= GET_CODE (k
);
16739 machine_mode mode
= GET_MODE (k
);
16740 unsigned result
= (code
<< 3) ^ mode
;
16741 const char *format
;
16744 format
= GET_RTX_FORMAT (code
);
16745 flen
= strlen (format
);
16751 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
16753 case CONST_WIDE_INT
:
16756 flen
= CONST_WIDE_INT_NUNITS (k
);
16757 for (i
= 0; i
< flen
; i
++)
16758 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
16763 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
16773 for (; fidx
< flen
; fidx
++)
16774 switch (format
[fidx
])
16779 const char *str
= XSTR (k
, fidx
);
16780 len
= strlen (str
);
16781 result
= result
* 613 + len
;
16782 for (i
= 0; i
< len
; i
++)
16783 result
= result
* 613 + (unsigned) str
[i
];
16788 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
16792 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
16795 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
16796 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
16800 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
16801 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
16808 gcc_unreachable ();
16815 toc_hasher::hash (toc_hash_struct
*thc
)
16817 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
16820 /* Compare H1 and H2 for equivalence. */
16823 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
16828 if (h1
->key_mode
!= h2
->key_mode
)
16831 return rtx_equal_p (r1
, r2
);
16834 /* These are the names given by the C++ front-end to vtables, and
16835 vtable-like objects. Ideally, this logic should not be here;
16836 instead, there should be some programmatic way of inquiring as
16837 to whether or not an object is a vtable. */
16839 #define VTABLE_NAME_P(NAME) \
16840 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16841 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16842 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16843 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16844 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16846 #ifdef NO_DOLLAR_IN_LABEL
16847 /* Return a GGC-allocated character string translating dollar signs in
16848 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16851 rs6000_xcoff_strip_dollar (const char *name
)
16857 q
= (const char *) strchr (name
, '$');
16859 if (q
== 0 || q
== name
)
16862 len
= strlen (name
);
16863 strip
= XALLOCAVEC (char, len
+ 1);
16864 strcpy (strip
, name
);
16865 p
= strip
+ (q
- name
);
16869 p
= strchr (p
+ 1, '$');
16872 return ggc_alloc_string (strip
, len
);
16877 rs6000_output_symbol_ref (FILE *file
, rtx x
)
16879 const char *name
= XSTR (x
, 0);
16881 /* Currently C++ toc references to vtables can be emitted before it
16882 is decided whether the vtable is public or private. If this is
16883 the case, then the linker will eventually complain that there is
16884 a reference to an unknown section. Thus, for vtables only,
16885 we emit the TOC reference to reference the identifier and not the
16887 if (VTABLE_NAME_P (name
))
16889 RS6000_OUTPUT_BASENAME (file
, name
);
16892 assemble_name (file
, name
);
16895 /* Output a TOC entry. We derive the entry name from what is being
16899 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
16902 const char *name
= buf
;
16904 HOST_WIDE_INT offset
= 0;
16906 gcc_assert (!TARGET_NO_TOC_OR_PCREL
);
16908 /* When the linker won't eliminate them, don't output duplicate
16909 TOC entries (this happens on AIX if there is any kind of TOC,
16910 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16912 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
16914 struct toc_hash_struct
*h
;
16916 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16917 time because GGC is not initialized at that point. */
16918 if (toc_hash_table
== NULL
)
16919 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
16921 h
= ggc_alloc
<toc_hash_struct
> ();
16923 h
->key_mode
= mode
;
16924 h
->labelno
= labelno
;
16926 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
16927 if (*found
== NULL
)
16929 else /* This is indeed a duplicate.
16930 Set this label equal to that label. */
16932 fputs ("\t.set ", file
);
16933 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
16934 fprintf (file
, "%d,", labelno
);
16935 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
16936 fprintf (file
, "%d\n", ((*found
)->labelno
));
16939 if (TARGET_XCOFF
&& SYMBOL_REF_P (x
)
16940 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
16941 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
16943 fputs ("\t.set ", file
);
16944 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
16945 fprintf (file
, "%d,", labelno
);
16946 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
16947 fprintf (file
, "%d\n", ((*found
)->labelno
));
16954 /* If we're going to put a double constant in the TOC, make sure it's
16955 aligned properly when strict alignment is on. */
16956 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
16957 && STRICT_ALIGNMENT
16958 && GET_MODE_BITSIZE (mode
) >= 64
16959 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
16960 ASM_OUTPUT_ALIGN (file
, 3);
16963 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
16965 /* Handle FP constants specially. Note that if we have a minimal
16966 TOC, things we put here aren't actually in the TOC, so we can allow
16968 if (CONST_DOUBLE_P (x
)
16969 && (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
16970 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
16974 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
16975 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
16977 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
16981 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16982 fputs (DOUBLE_INT_ASM_OP
, file
);
16984 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16985 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
16986 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
16987 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
16988 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
16989 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
16990 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
16991 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
16996 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16997 fputs ("\t.long ", file
);
16999 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17000 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17001 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17002 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17003 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17004 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17008 else if (CONST_DOUBLE_P (x
)
17009 && (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
17013 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17014 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17016 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17020 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17021 fputs (DOUBLE_INT_ASM_OP
, file
);
17023 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17024 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17025 fprintf (file
, "0x%lx%08lx\n",
17026 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17027 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
17032 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17033 fputs ("\t.long ", file
);
17035 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17036 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17037 fprintf (file
, "0x%lx,0x%lx\n",
17038 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17042 else if (CONST_DOUBLE_P (x
)
17043 && (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
17047 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17048 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17050 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17054 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17055 fputs (DOUBLE_INT_ASM_OP
, file
);
17057 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17058 if (WORDS_BIG_ENDIAN
)
17059 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
17061 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17066 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17067 fputs ("\t.long ", file
);
17069 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17070 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17074 else if (GET_MODE (x
) == VOIDmode
&& CONST_INT_P (x
))
17076 unsigned HOST_WIDE_INT low
;
17077 HOST_WIDE_INT high
;
17079 low
= INTVAL (x
) & 0xffffffff;
17080 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
17082 /* TOC entries are always Pmode-sized, so when big-endian
17083 smaller integer constants in the TOC need to be padded.
17084 (This is still a win over putting the constants in
17085 a separate constant pool, because then we'd have
17086 to have both a TOC entry _and_ the actual constant.)
17088 For a 32-bit target, CONST_INT values are loaded and shifted
17089 entirely within `low' and can be stored in one TOC entry. */
17091 /* It would be easy to make this work, but it doesn't now. */
17092 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
17094 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
17097 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
17098 high
= (HOST_WIDE_INT
) low
>> 32;
17104 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17105 fputs (DOUBLE_INT_ASM_OP
, file
);
17107 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17108 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17109 fprintf (file
, "0x%lx%08lx\n",
17110 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17115 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
17117 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17118 fputs ("\t.long ", file
);
17120 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17121 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17122 fprintf (file
, "0x%lx,0x%lx\n",
17123 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17127 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17128 fputs ("\t.long ", file
);
17130 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
17131 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
17137 if (GET_CODE (x
) == CONST
)
17139 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
17140 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)));
17142 base
= XEXP (XEXP (x
, 0), 0);
17143 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
17146 switch (GET_CODE (base
))
17149 name
= XSTR (base
, 0);
17153 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
17154 CODE_LABEL_NUMBER (XEXP (base
, 0)));
17158 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
17162 gcc_unreachable ();
17165 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17166 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
17169 fputs ("\t.tc ", file
);
17170 RS6000_OUTPUT_BASENAME (file
, name
);
17173 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
17175 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
17177 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17178 after other TOC symbols, reducing overflow of small TOC access
17179 to [TC] symbols. */
17180 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
17181 ? "[TE]," : "[TC],", file
);
17184 /* Currently C++ toc references to vtables can be emitted before it
17185 is decided whether the vtable is public or private. If this is
17186 the case, then the linker will eventually complain that there is
17187 a TOC reference to an unknown section. Thus, for vtables only,
17188 we emit the TOC reference to reference the symbol and not the
17190 if (VTABLE_NAME_P (name
))
17192 RS6000_OUTPUT_BASENAME (file
, name
);
17194 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
17195 else if (offset
> 0)
17196 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
17199 output_addr_const (file
, x
);
17202 if (TARGET_XCOFF
&& SYMBOL_REF_P (base
))
17204 switch (SYMBOL_REF_TLS_MODEL (base
))
17208 case TLS_MODEL_LOCAL_EXEC
:
17209 fputs ("@le", file
);
17211 case TLS_MODEL_INITIAL_EXEC
:
17212 fputs ("@ie", file
);
17214 /* Use global-dynamic for local-dynamic. */
17215 case TLS_MODEL_GLOBAL_DYNAMIC
:
17216 case TLS_MODEL_LOCAL_DYNAMIC
:
17218 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
17219 fputs ("\t.tc .", file
);
17220 RS6000_OUTPUT_BASENAME (file
, name
);
17221 fputs ("[TC],", file
);
17222 output_addr_const (file
, x
);
17223 fputs ("@m", file
);
17226 gcc_unreachable ();
17234 /* Output an assembler pseudo-op to write an ASCII string of N characters
17235 starting at P to FILE.
17237 On the RS/6000, we have to do this using the .byte operation and
17238 write out special characters outside the quoted string.
17239 Also, the assembler is broken; very long strings are truncated,
17240 so we must artificially break them up early. */
17243 output_ascii (FILE *file
, const char *p
, int n
)
17246 int i
, count_string
;
17247 const char *for_string
= "\t.byte \"";
17248 const char *for_decimal
= "\t.byte ";
17249 const char *to_close
= NULL
;
17252 for (i
= 0; i
< n
; i
++)
17255 if (c
>= ' ' && c
< 0177)
17258 fputs (for_string
, file
);
17261 /* Write two quotes to get one. */
17269 for_decimal
= "\"\n\t.byte ";
17273 if (count_string
>= 512)
17275 fputs (to_close
, file
);
17277 for_string
= "\t.byte \"";
17278 for_decimal
= "\t.byte ";
17286 fputs (for_decimal
, file
);
17287 fprintf (file
, "%d", c
);
17289 for_string
= "\n\t.byte \"";
17290 for_decimal
= ", ";
17296 /* Now close the string if we have written one. Then end the line. */
17298 fputs (to_close
, file
);
17301 /* Generate a unique section name for FILENAME for a section type
17302 represented by SECTION_DESC. Output goes into BUF.
17304 SECTION_DESC can be any string, as long as it is different for each
17305 possible section type.
17307 We name the section in the same manner as xlc. The name begins with an
17308 underscore followed by the filename (after stripping any leading directory
17309 names) with the last period replaced by the string SECTION_DESC. If
17310 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17314 rs6000_gen_section_name (char **buf
, const char *filename
,
17315 const char *section_desc
)
17317 const char *q
, *after_last_slash
, *last_period
= 0;
17321 after_last_slash
= filename
;
17322 for (q
= filename
; *q
; q
++)
17325 after_last_slash
= q
+ 1;
17326 else if (*q
== '.')
17330 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
17331 *buf
= (char *) xmalloc (len
);
17336 for (q
= after_last_slash
; *q
; q
++)
17338 if (q
== last_period
)
17340 strcpy (p
, section_desc
);
17341 p
+= strlen (section_desc
);
17345 else if (ISALNUM (*q
))
17349 if (last_period
== 0)
17350 strcpy (p
, section_desc
);
17355 /* Emit profile function. */
17358 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
17360 /* Non-standard profiling for kernels, which just saves LR then calls
17361 _mcount without worrying about arg saves. The idea is to change
17362 the function prologue as little as possible as it isn't easy to
17363 account for arg save/restore code added just for _mcount. */
17364 if (TARGET_PROFILE_KERNEL
)
17367 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
17369 #ifndef NO_PROFILE_COUNTERS
17370 # define NO_PROFILE_COUNTERS 0
17372 if (NO_PROFILE_COUNTERS
)
17373 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17374 LCT_NORMAL
, VOIDmode
);
17378 const char *label_name
;
17381 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17382 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
17383 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
17385 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17386 LCT_NORMAL
, VOIDmode
, fun
, Pmode
);
17389 else if (DEFAULT_ABI
== ABI_DARWIN
)
17391 const char *mcount_name
= RS6000_MCOUNT
;
17392 int caller_addr_regno
= LR_REGNO
;
17394 /* Be conservative and always set this, at least for now. */
17395 crtl
->uses_pic_offset_table
= 1;
17398 /* For PIC code, set up a stub and collect the caller's address
17399 from r0, which is where the prologue puts it. */
17400 if (MACHOPIC_INDIRECT
17401 && crtl
->uses_pic_offset_table
)
17402 caller_addr_regno
= 0;
17404 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
17405 LCT_NORMAL
, VOIDmode
,
17406 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
17410 /* Write function profiler code. */
17413 output_function_profiler (FILE *file
, int labelno
)
17417 switch (DEFAULT_ABI
)
17420 gcc_unreachable ();
17425 warning (0, "no profiling of 64-bit code for this ABI");
17428 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17429 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
17430 if (NO_PROFILE_COUNTERS
)
17432 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17433 reg_names
[0], reg_names
[1]);
17435 else if (TARGET_SECURE_PLT
&& flag_pic
)
17437 if (TARGET_LINK_STACK
)
17440 get_ppc476_thunk_name (name
);
17441 asm_fprintf (file
, "\tbl %s\n", name
);
17444 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
17445 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17446 reg_names
[0], reg_names
[1]);
17447 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17448 asm_fprintf (file
, "\taddis %s,%s,",
17449 reg_names
[12], reg_names
[12]);
17450 assemble_name (file
, buf
);
17451 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
17452 assemble_name (file
, buf
);
17453 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
17455 else if (flag_pic
== 1)
17457 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
17458 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17459 reg_names
[0], reg_names
[1]);
17460 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17461 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
17462 assemble_name (file
, buf
);
17463 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
17465 else if (flag_pic
> 1)
17467 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17468 reg_names
[0], reg_names
[1]);
17469 /* Now, we need to get the address of the label. */
17470 if (TARGET_LINK_STACK
)
17473 get_ppc476_thunk_name (name
);
17474 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
17475 assemble_name (file
, buf
);
17476 fputs ("-.\n1:", file
);
17477 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17478 asm_fprintf (file
, "\taddi %s,%s,4\n",
17479 reg_names
[11], reg_names
[11]);
17483 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
17484 assemble_name (file
, buf
);
17485 fputs ("-.\n1:", file
);
17486 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17488 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
17489 reg_names
[0], reg_names
[11]);
17490 asm_fprintf (file
, "\tadd %s,%s,%s\n",
17491 reg_names
[0], reg_names
[0], reg_names
[11]);
17495 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
17496 assemble_name (file
, buf
);
17497 fputs ("@ha\n", file
);
17498 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17499 reg_names
[0], reg_names
[1]);
17500 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
17501 assemble_name (file
, buf
);
17502 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
17505 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17506 fprintf (file
, "\tbl %s%s\n",
17507 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
17513 /* Don't do anything, done in output_profile_hook (). */
17520 /* The following variable value is the last issued insn. */
17522 static rtx_insn
*last_scheduled_insn
;
17524 /* The following variable helps to balance issuing of load and
17525 store instructions */
17527 static int load_store_pendulum
;
17529 /* The following variable helps pair divide insns during scheduling. */
17530 static int divide_cnt
;
17531 /* The following variable helps pair and alternate vector and vector load
17532 insns during scheduling. */
17533 static int vec_pairing
;
17536 /* Power4 load update and store update instructions are cracked into a
17537 load or store and an integer insn which are executed in the same cycle.
17538 Branches have their own dispatch slot which does not count against the
17539 GCC issue rate, but it changes the program flow so there are no other
17540 instructions to issue in this cycle. */
17543 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
17545 last_scheduled_insn
= insn
;
17546 if (GET_CODE (PATTERN (insn
)) == USE
17547 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17549 cached_can_issue_more
= more
;
17550 return cached_can_issue_more
;
17553 if (insn_terminates_group_p (insn
, current_group
))
17555 cached_can_issue_more
= 0;
17556 return cached_can_issue_more
;
17559 /* If no reservation, but reach here */
17560 if (recog_memoized (insn
) < 0)
17563 if (rs6000_sched_groups
)
17565 if (is_microcoded_insn (insn
))
17566 cached_can_issue_more
= 0;
17567 else if (is_cracked_insn (insn
))
17568 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
17570 cached_can_issue_more
= more
- 1;
17572 return cached_can_issue_more
;
17575 if (rs6000_tune
== PROCESSOR_CELL
&& is_nonpipeline_insn (insn
))
17578 cached_can_issue_more
= more
- 1;
17579 return cached_can_issue_more
;
17583 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
17585 int r
= rs6000_variable_issue_1 (insn
, more
);
17587 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
17591 /* Adjust the cost of a scheduling dependency. Return the new cost of
17592 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17595 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
17598 enum attr_type attr_type
;
17600 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
17607 /* Data dependency; DEP_INSN writes a register that INSN reads
17608 some cycles later. */
17610 /* Separate a load from a narrower, dependent store. */
17611 if ((rs6000_sched_groups
|| rs6000_tune
== PROCESSOR_POWER9
17612 || rs6000_tune
== PROCESSOR_POWER10
)
17613 && GET_CODE (PATTERN (insn
)) == SET
17614 && GET_CODE (PATTERN (dep_insn
)) == SET
17615 && MEM_P (XEXP (PATTERN (insn
), 1))
17616 && MEM_P (XEXP (PATTERN (dep_insn
), 0))
17617 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
17618 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
17621 attr_type
= get_attr_type (insn
);
17626 /* Tell the first scheduling pass about the latency between
17627 a mtctr and bctr (and mtlr and br/blr). The first
17628 scheduling pass will not know about this latency since
17629 the mtctr instruction, which has the latency associated
17630 to it, will be generated by reload. */
17633 /* Leave some extra cycles between a compare and its
17634 dependent branch, to inhibit expensive mispredicts. */
17635 if ((rs6000_tune
== PROCESSOR_PPC603
17636 || rs6000_tune
== PROCESSOR_PPC604
17637 || rs6000_tune
== PROCESSOR_PPC604e
17638 || rs6000_tune
== PROCESSOR_PPC620
17639 || rs6000_tune
== PROCESSOR_PPC630
17640 || rs6000_tune
== PROCESSOR_PPC750
17641 || rs6000_tune
== PROCESSOR_PPC7400
17642 || rs6000_tune
== PROCESSOR_PPC7450
17643 || rs6000_tune
== PROCESSOR_PPCE5500
17644 || rs6000_tune
== PROCESSOR_PPCE6500
17645 || rs6000_tune
== PROCESSOR_POWER4
17646 || rs6000_tune
== PROCESSOR_POWER5
17647 || rs6000_tune
== PROCESSOR_POWER7
17648 || rs6000_tune
== PROCESSOR_POWER8
17649 || rs6000_tune
== PROCESSOR_POWER9
17650 || rs6000_tune
== PROCESSOR_POWER10
17651 || rs6000_tune
== PROCESSOR_CELL
)
17652 && recog_memoized (dep_insn
)
17653 && (INSN_CODE (dep_insn
) >= 0))
17655 switch (get_attr_type (dep_insn
))
17658 case TYPE_FPCOMPARE
:
17659 case TYPE_CR_LOGICAL
:
17663 if (get_attr_dot (dep_insn
) == DOT_YES
)
17668 if (get_attr_dot (dep_insn
) == DOT_YES
17669 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
17680 if ((rs6000_tune
== PROCESSOR_POWER6
)
17681 && recog_memoized (dep_insn
)
17682 && (INSN_CODE (dep_insn
) >= 0))
17685 if (GET_CODE (PATTERN (insn
)) != SET
)
17686 /* If this happens, we have to extend this to schedule
17687 optimally. Return default for now. */
17690 /* Adjust the cost for the case where the value written
17691 by a fixed point operation is used as the address
17692 gen value on a store. */
17693 switch (get_attr_type (dep_insn
))
17698 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17699 return get_attr_sign_extend (dep_insn
)
17700 == SIGN_EXTEND_YES
? 6 : 4;
17705 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17706 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
17716 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17724 if (get_attr_update (dep_insn
) == UPDATE_YES
17725 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
17731 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17737 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17738 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
17748 if ((rs6000_tune
== PROCESSOR_POWER6
)
17749 && recog_memoized (dep_insn
)
17750 && (INSN_CODE (dep_insn
) >= 0))
17753 /* Adjust the cost for the case where the value written
17754 by a fixed point instruction is used within the address
17755 gen portion of a subsequent load(u)(x) */
17756 switch (get_attr_type (dep_insn
))
17761 if (set_to_load_agen (dep_insn
, insn
))
17762 return get_attr_sign_extend (dep_insn
)
17763 == SIGN_EXTEND_YES
? 6 : 4;
17768 if (set_to_load_agen (dep_insn
, insn
))
17769 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
17779 if (set_to_load_agen (dep_insn
, insn
))
17787 if (get_attr_update (dep_insn
) == UPDATE_YES
17788 && set_to_load_agen (dep_insn
, insn
))
17794 if (set_to_load_agen (dep_insn
, insn
))
17800 if (set_to_load_agen (dep_insn
, insn
))
17801 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
17814 /* Fall out to return default cost. */
17818 case REG_DEP_OUTPUT
:
17819 /* Output dependency; DEP_INSN writes a register that INSN writes some
17821 if ((rs6000_tune
== PROCESSOR_POWER6
)
17822 && recog_memoized (dep_insn
)
17823 && (INSN_CODE (dep_insn
) >= 0))
17825 attr_type
= get_attr_type (insn
);
17830 case TYPE_FPSIMPLE
:
17831 if (get_attr_type (dep_insn
) == TYPE_FP
17832 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
17839 /* Fall through, no cost for output dependency. */
17843 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17848 gcc_unreachable ();
17854 /* Debug version of rs6000_adjust_cost. */
17857 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
17858 int cost
, unsigned int dw
)
17860 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
17868 default: dep
= "unknown depencency"; break;
17869 case REG_DEP_TRUE
: dep
= "data dependency"; break;
17870 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
17871 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
17875 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17876 "%s, insn:\n", ret
, cost
, dep
);
17884 /* The function returns a true if INSN is microcoded.
17885 Return false otherwise. */
17888 is_microcoded_insn (rtx_insn
*insn
)
17890 if (!insn
|| !NONDEBUG_INSN_P (insn
)
17891 || GET_CODE (PATTERN (insn
)) == USE
17892 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17895 if (rs6000_tune
== PROCESSOR_CELL
)
17896 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
17898 if (rs6000_sched_groups
17899 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
17901 enum attr_type type
= get_attr_type (insn
);
17902 if ((type
== TYPE_LOAD
17903 && get_attr_update (insn
) == UPDATE_YES
17904 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
17905 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
17906 && get_attr_update (insn
) == UPDATE_YES
17907 && get_attr_indexed (insn
) == INDEXED_YES
)
17908 || type
== TYPE_MFCR
)
17915 /* The function returns true if INSN is cracked into 2 instructions
17916 by the processor (and therefore occupies 2 issue slots). */
17919 is_cracked_insn (rtx_insn
*insn
)
17921 if (!insn
|| !NONDEBUG_INSN_P (insn
)
17922 || GET_CODE (PATTERN (insn
)) == USE
17923 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17926 if (rs6000_sched_groups
17927 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
17929 enum attr_type type
= get_attr_type (insn
);
17930 if ((type
== TYPE_LOAD
17931 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
17932 && get_attr_update (insn
) == UPDATE_NO
)
17933 || (type
== TYPE_LOAD
17934 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
17935 && get_attr_update (insn
) == UPDATE_YES
17936 && get_attr_indexed (insn
) == INDEXED_NO
)
17937 || (type
== TYPE_STORE
17938 && get_attr_update (insn
) == UPDATE_YES
17939 && get_attr_indexed (insn
) == INDEXED_NO
)
17940 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
17941 && get_attr_update (insn
) == UPDATE_YES
)
17942 || (type
== TYPE_CR_LOGICAL
17943 && get_attr_cr_logical_3op (insn
) == CR_LOGICAL_3OP_YES
)
17944 || (type
== TYPE_EXTS
17945 && get_attr_dot (insn
) == DOT_YES
)
17946 || (type
== TYPE_SHIFT
17947 && get_attr_dot (insn
) == DOT_YES
17948 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
17949 || (type
== TYPE_MUL
17950 && get_attr_dot (insn
) == DOT_YES
)
17951 || type
== TYPE_DIV
17952 || (type
== TYPE_INSERT
17953 && get_attr_size (insn
) == SIZE_32
))
17960 /* The function returns true if INSN can be issued only from
17961 the branch slot. */
17964 is_branch_slot_insn (rtx_insn
*insn
)
17966 if (!insn
|| !NONDEBUG_INSN_P (insn
)
17967 || GET_CODE (PATTERN (insn
)) == USE
17968 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17971 if (rs6000_sched_groups
)
17973 enum attr_type type
= get_attr_type (insn
);
17974 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
17982 /* The function returns true if out_inst sets a value that is
17983 used in the address generation computation of in_insn */
17985 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
17987 rtx out_set
, in_set
;
17989 /* For performance reasons, only handle the simple case where
17990 both loads are a single_set. */
17991 out_set
= single_set (out_insn
);
17994 in_set
= single_set (in_insn
);
17996 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
18002 /* Try to determine base/offset/size parts of the given MEM.
18003 Return true if successful, false if all the values couldn't
18006 This function only looks for REG or REG+CONST address forms.
18007 REG+REG address form will return false. */
18010 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
18011 HOST_WIDE_INT
*size
)
18014 if MEM_SIZE_KNOWN_P (mem
)
18015 *size
= MEM_SIZE (mem
);
18019 addr_rtx
= (XEXP (mem
, 0));
18020 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
18021 addr_rtx
= XEXP (addr_rtx
, 1);
18024 while (GET_CODE (addr_rtx
) == PLUS
18025 && CONST_INT_P (XEXP (addr_rtx
, 1)))
18027 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
18028 addr_rtx
= XEXP (addr_rtx
, 0);
18030 if (!REG_P (addr_rtx
))
18037 /* The function returns true if the target storage location of
18038 mem1 is adjacent to the target storage location of mem2 */
18039 /* Return 1 if memory locations are adjacent. */
18042 adjacent_mem_locations (rtx mem1
, rtx mem2
)
18045 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18047 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18048 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
18049 return ((REGNO (reg1
) == REGNO (reg2
))
18050 && ((off1
+ size1
== off2
)
18051 || (off2
+ size2
== off1
)));
18056 /* This function returns true if it can be determined that the two MEM
18057 locations overlap by at least 1 byte based on base reg/offset/size. */
18060 mem_locations_overlap (rtx mem1
, rtx mem2
)
18063 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18065 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18066 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
18067 return ((REGNO (reg1
) == REGNO (reg2
))
18068 && (((off1
<= off2
) && (off1
+ size1
> off2
))
18069 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
18074 /* A C statement (sans semicolon) to update the integer scheduling
18075 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18076 INSN earlier, reduce the priority to execute INSN later. Do not
18077 define this macro if you do not need to adjust the scheduling
18078 priorities of insns. */
18081 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
18083 rtx load_mem
, str_mem
;
18084 /* On machines (like the 750) which have asymmetric integer units,
18085 where one integer unit can do multiply and divides and the other
18086 can't, reduce the priority of multiply/divide so it is scheduled
18087 before other integer operations. */
18090 if (! INSN_P (insn
))
18093 if (GET_CODE (PATTERN (insn
)) == USE
)
18096 switch (rs6000_tune
) {
18097 case PROCESSOR_PPC750
:
18098 switch (get_attr_type (insn
))
18105 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
18106 priority
, priority
);
18107 if (priority
>= 0 && priority
< 0x01000000)
18114 if (insn_must_be_first_in_group (insn
)
18115 && reload_completed
18116 && current_sched_info
->sched_max_insns_priority
18117 && rs6000_sched_restricted_insns_priority
)
18120 /* Prioritize insns that can be dispatched only in the first
18122 if (rs6000_sched_restricted_insns_priority
== 1)
18123 /* Attach highest priority to insn. This means that in
18124 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
18125 precede 'priority' (critical path) considerations. */
18126 return current_sched_info
->sched_max_insns_priority
;
18127 else if (rs6000_sched_restricted_insns_priority
== 2)
18128 /* Increase priority of insn by a minimal amount. This means that in
18129 haifa-sched.c:ready_sort(), only 'priority' (critical path)
18130 considerations precede dispatch-slot restriction considerations. */
18131 return (priority
+ 1);
18134 if (rs6000_tune
== PROCESSOR_POWER6
18135 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
18136 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
18137 /* Attach highest priority to insn if the scheduler has just issued two
18138 stores and this instruction is a load, or two loads and this instruction
18139 is a store. Power6 wants loads and stores scheduled alternately
18141 return current_sched_info
->sched_max_insns_priority
;
18146 /* Return true if the instruction is nonpipelined on the Cell. */
18148 is_nonpipeline_insn (rtx_insn
*insn
)
18150 enum attr_type type
;
18151 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18152 || GET_CODE (PATTERN (insn
)) == USE
18153 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18156 type
= get_attr_type (insn
);
18157 if (type
== TYPE_MUL
18158 || type
== TYPE_DIV
18159 || type
== TYPE_SDIV
18160 || type
== TYPE_DDIV
18161 || type
== TYPE_SSQRT
18162 || type
== TYPE_DSQRT
18163 || type
== TYPE_MFCR
18164 || type
== TYPE_MFCRF
18165 || type
== TYPE_MFJMPR
)
18173 /* Return how many instructions the machine can issue per cycle. */
18176 rs6000_issue_rate (void)
18178 /* Unless scheduling for register pressure, use issue rate of 1 for
18179 first scheduling pass to decrease degradation. */
18180 if (!reload_completed
&& !flag_sched_pressure
)
18183 switch (rs6000_tune
) {
18184 case PROCESSOR_RS64A
:
18185 case PROCESSOR_PPC601
: /* ? */
18186 case PROCESSOR_PPC7450
:
18188 case PROCESSOR_PPC440
:
18189 case PROCESSOR_PPC603
:
18190 case PROCESSOR_PPC750
:
18191 case PROCESSOR_PPC7400
:
18192 case PROCESSOR_PPC8540
:
18193 case PROCESSOR_PPC8548
:
18194 case PROCESSOR_CELL
:
18195 case PROCESSOR_PPCE300C2
:
18196 case PROCESSOR_PPCE300C3
:
18197 case PROCESSOR_PPCE500MC
:
18198 case PROCESSOR_PPCE500MC64
:
18199 case PROCESSOR_PPCE5500
:
18200 case PROCESSOR_PPCE6500
:
18201 case PROCESSOR_TITAN
:
18203 case PROCESSOR_PPC476
:
18204 case PROCESSOR_PPC604
:
18205 case PROCESSOR_PPC604e
:
18206 case PROCESSOR_PPC620
:
18207 case PROCESSOR_PPC630
:
18209 case PROCESSOR_POWER4
:
18210 case PROCESSOR_POWER5
:
18211 case PROCESSOR_POWER6
:
18212 case PROCESSOR_POWER7
:
18214 case PROCESSOR_POWER8
:
18216 case PROCESSOR_POWER9
:
18217 case PROCESSOR_POWER10
:
18224 /* Return how many instructions to look ahead for better insn
18228 rs6000_use_sched_lookahead (void)
18230 switch (rs6000_tune
)
18232 case PROCESSOR_PPC8540
:
18233 case PROCESSOR_PPC8548
:
18236 case PROCESSOR_CELL
:
18237 return (reload_completed
? 8 : 0);
18244 /* We are choosing insn from the ready queue. Return zero if INSN can be
18247 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
18249 if (ready_index
== 0)
18252 if (rs6000_tune
!= PROCESSOR_CELL
)
18255 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
18257 if (!reload_completed
18258 || is_nonpipeline_insn (insn
)
18259 || is_microcoded_insn (insn
))
18265 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18266 and return true. */
18269 find_mem_ref (rtx pat
, rtx
*mem_ref
)
18274 /* stack_tie does not produce any real memory traffic. */
18275 if (tie_operand (pat
, VOIDmode
))
18284 /* Recursively process the pattern. */
18285 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
18287 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
18291 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
18294 else if (fmt
[i
] == 'E')
18295 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
18297 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
18305 /* Determine if PAT is a PATTERN of a load insn. */
18308 is_load_insn1 (rtx pat
, rtx
*load_mem
)
18310 if (!pat
|| pat
== NULL_RTX
)
18313 if (GET_CODE (pat
) == SET
)
18314 return find_mem_ref (SET_SRC (pat
), load_mem
);
18316 if (GET_CODE (pat
) == PARALLEL
)
18320 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18321 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
18328 /* Determine if INSN loads from memory. */
18331 is_load_insn (rtx insn
, rtx
*load_mem
)
18333 if (!insn
|| !INSN_P (insn
))
18339 return is_load_insn1 (PATTERN (insn
), load_mem
);
18342 /* Determine if PAT is a PATTERN of a store insn. */
18345 is_store_insn1 (rtx pat
, rtx
*str_mem
)
18347 if (!pat
|| pat
== NULL_RTX
)
18350 if (GET_CODE (pat
) == SET
)
18351 return find_mem_ref (SET_DEST (pat
), str_mem
);
18353 if (GET_CODE (pat
) == PARALLEL
)
18357 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18358 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
18365 /* Determine if INSN stores to memory. */
18368 is_store_insn (rtx insn
, rtx
*str_mem
)
18370 if (!insn
|| !INSN_P (insn
))
18373 return is_store_insn1 (PATTERN (insn
), str_mem
);
18376 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18379 is_power9_pairable_vec_type (enum attr_type type
)
18383 case TYPE_VECSIMPLE
:
18384 case TYPE_VECCOMPLEX
:
18388 case TYPE_VECFLOAT
:
18390 case TYPE_VECDOUBLE
:
18398 /* Returns whether the dependence between INSN and NEXT is considered
18399 costly by the given target. */
18402 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
18406 rtx load_mem
, str_mem
;
18408 /* If the flag is not enabled - no dependence is considered costly;
18409 allow all dependent insns in the same group.
18410 This is the most aggressive option. */
18411 if (rs6000_sched_costly_dep
== no_dep_costly
)
18414 /* If the flag is set to 1 - a dependence is always considered costly;
18415 do not allow dependent instructions in the same group.
18416 This is the most conservative option. */
18417 if (rs6000_sched_costly_dep
== all_deps_costly
)
18420 insn
= DEP_PRO (dep
);
18421 next
= DEP_CON (dep
);
18423 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
18424 && is_load_insn (next
, &load_mem
)
18425 && is_store_insn (insn
, &str_mem
))
18426 /* Prevent load after store in the same group. */
18429 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
18430 && is_load_insn (next
, &load_mem
)
18431 && is_store_insn (insn
, &str_mem
)
18432 && DEP_TYPE (dep
) == REG_DEP_TRUE
18433 && mem_locations_overlap(str_mem
, load_mem
))
18434 /* Prevent load after store in the same group if it is a true
18438 /* The flag is set to X; dependences with latency >= X are considered costly,
18439 and will not be scheduled in the same group. */
18440 if (rs6000_sched_costly_dep
<= max_dep_latency
18441 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
18447 /* Return the next insn after INSN that is found before TAIL is reached,
18448 skipping any "non-active" insns - insns that will not actually occupy
18449 an issue slot. Return NULL_RTX if such an insn is not found. */
18452 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
18454 if (insn
== NULL_RTX
|| insn
== tail
)
18459 insn
= NEXT_INSN (insn
);
18460 if (insn
== NULL_RTX
|| insn
== tail
)
18464 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
18465 || (NONJUMP_INSN_P (insn
)
18466 && GET_CODE (PATTERN (insn
)) != USE
18467 && GET_CODE (PATTERN (insn
)) != CLOBBER
18468 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
18474 /* Move instruction at POS to the end of the READY list. */
18477 move_to_end_of_ready (rtx_insn
**ready
, int pos
, int lastpos
)
18483 for (i
= pos
; i
< lastpos
; i
++)
18484 ready
[i
] = ready
[i
+ 1];
18485 ready
[lastpos
] = tmp
;
18488 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18491 power6_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
18493 /* For Power6, we need to handle some special cases to try and keep the
18494 store queue from overflowing and triggering expensive flushes.
18496 This code monitors how load and store instructions are being issued
18497 and skews the ready list one way or the other to increase the likelihood
18498 that a desired instruction is issued at the proper time.
18500 A couple of things are done. First, we maintain a "load_store_pendulum"
18501 to track the current state of load/store issue.
18503 - If the pendulum is at zero, then no loads or stores have been
18504 issued in the current cycle so we do nothing.
18506 - If the pendulum is 1, then a single load has been issued in this
18507 cycle and we attempt to locate another load in the ready list to
18510 - If the pendulum is -2, then two stores have already been
18511 issued in this cycle, so we increase the priority of the first load
18512 in the ready list to increase it's likelihood of being chosen first
18515 - If the pendulum is -1, then a single store has been issued in this
18516 cycle and we attempt to locate another store in the ready list to
18517 issue with it, preferring a store to an adjacent memory location to
18518 facilitate store pairing in the store queue.
18520 - If the pendulum is 2, then two loads have already been
18521 issued in this cycle, so we increase the priority of the first store
18522 in the ready list to increase it's likelihood of being chosen first
18525 - If the pendulum < -2 or > 2, then do nothing.
18527 Note: This code covers the most common scenarios. There exist non
18528 load/store instructions which make use of the LSU and which
18529 would need to be accounted for to strictly model the behavior
18530 of the machine. Those instructions are currently unaccounted
18531 for to help minimize compile time overhead of this code.
18534 rtx load_mem
, str_mem
;
18536 if (is_store_insn (last_scheduled_insn
, &str_mem
))
18537 /* Issuing a store, swing the load_store_pendulum to the left */
18538 load_store_pendulum
--;
18539 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
18540 /* Issuing a load, swing the load_store_pendulum to the right */
18541 load_store_pendulum
++;
18543 return cached_can_issue_more
;
18545 /* If the pendulum is balanced, or there is only one instruction on
18546 the ready list, then all is well, so return. */
18547 if ((load_store_pendulum
== 0) || (lastpos
<= 0))
18548 return cached_can_issue_more
;
18550 if (load_store_pendulum
== 1)
18552 /* A load has been issued in this cycle. Scan the ready list
18553 for another load to issue with it */
18558 if (is_load_insn (ready
[pos
], &load_mem
))
18560 /* Found a load. Move it to the head of the ready list,
18561 and adjust it's priority so that it is more likely to
18563 move_to_end_of_ready (ready
, pos
, lastpos
);
18565 if (!sel_sched_p ()
18566 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18567 INSN_PRIORITY (ready
[lastpos
])++;
18573 else if (load_store_pendulum
== -2)
18575 /* Two stores have been issued in this cycle. Increase the
18576 priority of the first load in the ready list to favor it for
18577 issuing in the next cycle. */
18582 if (is_load_insn (ready
[pos
], &load_mem
)
18584 && INSN_PRIORITY_KNOWN (ready
[pos
]))
18586 INSN_PRIORITY (ready
[pos
])++;
18588 /* Adjust the pendulum to account for the fact that a load
18589 was found and increased in priority. This is to prevent
18590 increasing the priority of multiple loads */
18591 load_store_pendulum
--;
18598 else if (load_store_pendulum
== -1)
18600 /* A store has been issued in this cycle. Scan the ready list for
18601 another store to issue with it, preferring a store to an adjacent
18603 int first_store_pos
= -1;
18609 if (is_store_insn (ready
[pos
], &str_mem
))
18612 /* Maintain the index of the first store found on the
18614 if (first_store_pos
== -1)
18615 first_store_pos
= pos
;
18617 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
18618 && adjacent_mem_locations (str_mem
, str_mem2
))
18620 /* Found an adjacent store. Move it to the head of the
18621 ready list, and adjust it's priority so that it is
18622 more likely to stay there */
18623 move_to_end_of_ready (ready
, pos
, lastpos
);
18625 if (!sel_sched_p ()
18626 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18627 INSN_PRIORITY (ready
[lastpos
])++;
18629 first_store_pos
= -1;
18637 if (first_store_pos
>= 0)
18639 /* An adjacent store wasn't found, but a non-adjacent store was,
18640 so move the non-adjacent store to the front of the ready
18641 list, and adjust its priority so that it is more likely to
18643 move_to_end_of_ready (ready
, first_store_pos
, lastpos
);
18644 if (!sel_sched_p ()
18645 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18646 INSN_PRIORITY (ready
[lastpos
])++;
18649 else if (load_store_pendulum
== 2)
18651 /* Two loads have been issued in this cycle. Increase the priority
18652 of the first store in the ready list to favor it for issuing in
18658 if (is_store_insn (ready
[pos
], &str_mem
)
18660 && INSN_PRIORITY_KNOWN (ready
[pos
]))
18662 INSN_PRIORITY (ready
[pos
])++;
18664 /* Adjust the pendulum to account for the fact that a store
18665 was found and increased in priority. This is to prevent
18666 increasing the priority of multiple stores */
18667 load_store_pendulum
++;
18675 return cached_can_issue_more
;
18678 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18681 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
18684 enum attr_type type
, type2
;
18686 type
= get_attr_type (last_scheduled_insn
);
18688 /* Try to issue fixed point divides back-to-back in pairs so they will be
18689 routed to separate execution units and execute in parallel. */
18690 if (type
== TYPE_DIV
&& divide_cnt
== 0)
18692 /* First divide has been scheduled. */
18695 /* Scan the ready list looking for another divide, if found move it
18696 to the end of the list so it is chosen next. */
18700 if (recog_memoized (ready
[pos
]) >= 0
18701 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
18703 move_to_end_of_ready (ready
, pos
, lastpos
);
18711 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18714 /* The best dispatch throughput for vector and vector load insns can be
18715 achieved by interleaving a vector and vector load such that they'll
18716 dispatch to the same superslice. If this pairing cannot be achieved
18717 then it is best to pair vector insns together and vector load insns
18720 To aid in this pairing, vec_pairing maintains the current state with
18721 the following values:
18723 0 : Initial state, no vecload/vector pairing has been started.
18725 1 : A vecload or vector insn has been issued and a candidate for
18726 pairing has been found and moved to the end of the ready
18728 if (type
== TYPE_VECLOAD
)
18730 /* Issued a vecload. */
18731 if (vec_pairing
== 0)
18733 int vecload_pos
= -1;
18734 /* We issued a single vecload, look for a vector insn to pair it
18735 with. If one isn't found, try to pair another vecload. */
18739 if (recog_memoized (ready
[pos
]) >= 0)
18741 type2
= get_attr_type (ready
[pos
]);
18742 if (is_power9_pairable_vec_type (type2
))
18744 /* Found a vector insn to pair with, move it to the
18745 end of the ready list so it is scheduled next. */
18746 move_to_end_of_ready (ready
, pos
, lastpos
);
18748 return cached_can_issue_more
;
18750 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
18751 /* Remember position of first vecload seen. */
18756 if (vecload_pos
>= 0)
18758 /* Didn't find a vector to pair with but did find a vecload,
18759 move it to the end of the ready list. */
18760 move_to_end_of_ready (ready
, vecload_pos
, lastpos
);
18762 return cached_can_issue_more
;
18766 else if (is_power9_pairable_vec_type (type
))
18768 /* Issued a vector operation. */
18769 if (vec_pairing
== 0)
18772 /* We issued a single vector insn, look for a vecload to pair it
18773 with. If one isn't found, try to pair another vector. */
18777 if (recog_memoized (ready
[pos
]) >= 0)
18779 type2
= get_attr_type (ready
[pos
]);
18780 if (type2
== TYPE_VECLOAD
)
18782 /* Found a vecload insn to pair with, move it to the
18783 end of the ready list so it is scheduled next. */
18784 move_to_end_of_ready (ready
, pos
, lastpos
);
18786 return cached_can_issue_more
;
18788 else if (is_power9_pairable_vec_type (type2
)
18790 /* Remember position of first vector insn seen. */
18797 /* Didn't find a vecload to pair with but did find a vector
18798 insn, move it to the end of the ready list. */
18799 move_to_end_of_ready (ready
, vec_pos
, lastpos
);
18801 return cached_can_issue_more
;
18806 /* We've either finished a vec/vecload pair, couldn't find an insn to
18807 continue the current pair, or the last insn had nothing to do with
18808 with pairing. In any case, reset the state. */
18812 return cached_can_issue_more
;
18815 /* We are about to begin issuing insns for this clock cycle. */
18818 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
18819 rtx_insn
**ready ATTRIBUTE_UNUSED
,
18820 int *pn_ready ATTRIBUTE_UNUSED
,
18821 int clock_var ATTRIBUTE_UNUSED
)
18823 int n_ready
= *pn_ready
;
18826 fprintf (dump
, "// rs6000_sched_reorder :\n");
18828 /* Reorder the ready list, if the second to last ready insn
18829 is a nonepipeline insn. */
18830 if (rs6000_tune
== PROCESSOR_CELL
&& n_ready
> 1)
18832 if (is_nonpipeline_insn (ready
[n_ready
- 1])
18833 && (recog_memoized (ready
[n_ready
- 2]) > 0))
18834 /* Simply swap first two insns. */
18835 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
18838 if (rs6000_tune
== PROCESSOR_POWER6
)
18839 load_store_pendulum
= 0;
18841 return rs6000_issue_rate ();
18844 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18847 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
18848 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
18851 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
18853 /* Do Power6 dependent reordering if necessary. */
18854 if (rs6000_tune
== PROCESSOR_POWER6
&& last_scheduled_insn
)
18855 return power6_sched_reorder2 (ready
, *pn_ready
- 1);
18857 /* Do Power9 dependent reordering if necessary. */
18858 if (rs6000_tune
== PROCESSOR_POWER9
&& last_scheduled_insn
18859 && recog_memoized (last_scheduled_insn
) >= 0)
18860 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
18862 return cached_can_issue_more
;
18865 /* Return whether the presence of INSN causes a dispatch group termination
18866 of group WHICH_GROUP.
18868 If WHICH_GROUP == current_group, this function will return true if INSN
18869 causes the termination of the current group (i.e, the dispatch group to
18870 which INSN belongs). This means that INSN will be the last insn in the
18871 group it belongs to.
18873 If WHICH_GROUP == previous_group, this function will return true if INSN
18874 causes the termination of the previous group (i.e, the dispatch group that
18875 precedes the group to which INSN belongs). This means that INSN will be
18876 the first insn in the group it belongs to). */
18879 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
18886 first
= insn_must_be_first_in_group (insn
);
18887 last
= insn_must_be_last_in_group (insn
);
18892 if (which_group
== current_group
)
18894 else if (which_group
== previous_group
)
18902 insn_must_be_first_in_group (rtx_insn
*insn
)
18904 enum attr_type type
;
18908 || DEBUG_INSN_P (insn
)
18909 || GET_CODE (PATTERN (insn
)) == USE
18910 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18913 switch (rs6000_tune
)
18915 case PROCESSOR_POWER5
:
18916 if (is_cracked_insn (insn
))
18919 case PROCESSOR_POWER4
:
18920 if (is_microcoded_insn (insn
))
18923 if (!rs6000_sched_groups
)
18926 type
= get_attr_type (insn
);
18933 case TYPE_CR_LOGICAL
:
18946 case PROCESSOR_POWER6
:
18947 type
= get_attr_type (insn
);
18956 case TYPE_FPCOMPARE
:
18967 if (get_attr_dot (insn
) == DOT_NO
18968 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
18973 if (get_attr_size (insn
) == SIZE_32
)
18981 if (get_attr_update (insn
) == UPDATE_YES
)
18989 case PROCESSOR_POWER7
:
18990 type
= get_attr_type (insn
);
18994 case TYPE_CR_LOGICAL
:
19008 if (get_attr_dot (insn
) == DOT_YES
)
19013 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19014 || get_attr_update (insn
) == UPDATE_YES
)
19021 if (get_attr_update (insn
) == UPDATE_YES
)
19029 case PROCESSOR_POWER8
:
19030 type
= get_attr_type (insn
);
19034 case TYPE_CR_LOGICAL
:
19042 case TYPE_VECSTORE
:
19049 if (get_attr_dot (insn
) == DOT_YES
)
19054 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19055 || get_attr_update (insn
) == UPDATE_YES
)
19060 if (get_attr_update (insn
) == UPDATE_YES
19061 && get_attr_indexed (insn
) == INDEXED_YES
)
19077 insn_must_be_last_in_group (rtx_insn
*insn
)
19079 enum attr_type type
;
19083 || DEBUG_INSN_P (insn
)
19084 || GET_CODE (PATTERN (insn
)) == USE
19085 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19088 switch (rs6000_tune
) {
19089 case PROCESSOR_POWER4
:
19090 case PROCESSOR_POWER5
:
19091 if (is_microcoded_insn (insn
))
19094 if (is_branch_slot_insn (insn
))
19098 case PROCESSOR_POWER6
:
19099 type
= get_attr_type (insn
);
19107 case TYPE_FPCOMPARE
:
19118 if (get_attr_dot (insn
) == DOT_NO
19119 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19124 if (get_attr_size (insn
) == SIZE_32
)
19132 case PROCESSOR_POWER7
:
19133 type
= get_attr_type (insn
);
19143 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19144 && get_attr_update (insn
) == UPDATE_YES
)
19149 if (get_attr_update (insn
) == UPDATE_YES
19150 && get_attr_indexed (insn
) == INDEXED_YES
)
19158 case PROCESSOR_POWER8
:
19159 type
= get_attr_type (insn
);
19171 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19172 && get_attr_update (insn
) == UPDATE_YES
)
19177 if (get_attr_update (insn
) == UPDATE_YES
19178 && get_attr_indexed (insn
) == INDEXED_YES
)
19193 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19194 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19197 is_costly_group (rtx
*group_insns
, rtx next_insn
)
19200 int issue_rate
= rs6000_issue_rate ();
19202 for (i
= 0; i
< issue_rate
; i
++)
19204 sd_iterator_def sd_it
;
19206 rtx insn
= group_insns
[i
];
19211 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
19213 rtx next
= DEP_CON (dep
);
19215 if (next
== next_insn
19216 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
19224 /* Utility of the function redefine_groups.
19225 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19226 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19227 to keep it "far" (in a separate group) from GROUP_INSNS, following
19228 one of the following schemes, depending on the value of the flag
19229 -minsert_sched_nops = X:
19230 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19231 in order to force NEXT_INSN into a separate group.
19232 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19233 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19234 insertion (has a group just ended, how many vacant issue slots remain in the
19235 last group, and how many dispatch groups were encountered so far). */
19238 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
19239 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
19244 int issue_rate
= rs6000_issue_rate ();
19245 bool end
= *group_end
;
19248 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
19249 return can_issue_more
;
19251 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
19252 return can_issue_more
;
19254 force
= is_costly_group (group_insns
, next_insn
);
19256 return can_issue_more
;
19258 if (sched_verbose
> 6)
19259 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
19260 *group_count
,can_issue_more
);
19262 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
19265 can_issue_more
= 0;
19267 /* Since only a branch can be issued in the last issue_slot, it is
19268 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19269 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19270 in this case the last nop will start a new group and the branch
19271 will be forced to the new group. */
19272 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
19275 /* Do we have a special group ending nop? */
19276 if (rs6000_tune
== PROCESSOR_POWER6
|| rs6000_tune
== PROCESSOR_POWER7
19277 || rs6000_tune
== PROCESSOR_POWER8
)
19279 nop
= gen_group_ending_nop ();
19280 emit_insn_before (nop
, next_insn
);
19281 can_issue_more
= 0;
19284 while (can_issue_more
> 0)
19287 emit_insn_before (nop
, next_insn
);
19295 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
19297 int n_nops
= rs6000_sched_insert_nops
;
19299 /* Nops can't be issued from the branch slot, so the effective
19300 issue_rate for nops is 'issue_rate - 1'. */
19301 if (can_issue_more
== 0)
19302 can_issue_more
= issue_rate
;
19304 if (can_issue_more
== 0)
19306 can_issue_more
= issue_rate
- 1;
19309 for (i
= 0; i
< issue_rate
; i
++)
19311 group_insns
[i
] = 0;
19318 emit_insn_before (nop
, next_insn
);
19319 if (can_issue_more
== issue_rate
- 1) /* new group begins */
19322 if (can_issue_more
== 0)
19324 can_issue_more
= issue_rate
- 1;
19327 for (i
= 0; i
< issue_rate
; i
++)
19329 group_insns
[i
] = 0;
19335 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19338 /* Is next_insn going to start a new group? */
19341 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19342 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19343 || (can_issue_more
< issue_rate
&&
19344 insn_terminates_group_p (next_insn
, previous_group
)));
19345 if (*group_end
&& end
)
19348 if (sched_verbose
> 6)
19349 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
19350 *group_count
, can_issue_more
);
19351 return can_issue_more
;
19354 return can_issue_more
;
19357 /* This function tries to synch the dispatch groups that the compiler "sees"
19358 with the dispatch groups that the processor dispatcher is expected to
19359 form in practice. It tries to achieve this synchronization by forcing the
19360 estimated processor grouping on the compiler (as opposed to the function
19361 'pad_goups' which tries to force the scheduler's grouping on the processor).
19363 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19364 examines the (estimated) dispatch groups that will be formed by the processor
19365 dispatcher. It marks these group boundaries to reflect the estimated
19366 processor grouping, overriding the grouping that the scheduler had marked.
19367 Depending on the value of the flag '-minsert-sched-nops' this function can
19368 force certain insns into separate groups or force a certain distance between
19369 them by inserting nops, for example, if there exists a "costly dependence"
19372 The function estimates the group boundaries that the processor will form as
19373 follows: It keeps track of how many vacant issue slots are available after
19374 each insn. A subsequent insn will start a new group if one of the following
19376 - no more vacant issue slots remain in the current dispatch group.
19377 - only the last issue slot, which is the branch slot, is vacant, but the next
19378 insn is not a branch.
19379 - only the last 2 or less issue slots, including the branch slot, are vacant,
19380 which means that a cracked insn (which occupies two issue slots) can't be
19381 issued in this group.
19382 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19383 start a new group. */
19386 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19389 rtx_insn
*insn
, *next_insn
;
19391 int can_issue_more
;
19394 int group_count
= 0;
19398 issue_rate
= rs6000_issue_rate ();
19399 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
19400 for (i
= 0; i
< issue_rate
; i
++)
19402 group_insns
[i
] = 0;
19404 can_issue_more
= issue_rate
;
19406 insn
= get_next_active_insn (prev_head_insn
, tail
);
19409 while (insn
!= NULL_RTX
)
19411 slot
= (issue_rate
- can_issue_more
);
19412 group_insns
[slot
] = insn
;
19414 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
19415 if (insn_terminates_group_p (insn
, current_group
))
19416 can_issue_more
= 0;
19418 next_insn
= get_next_active_insn (insn
, tail
);
19419 if (next_insn
== NULL_RTX
)
19420 return group_count
+ 1;
19422 /* Is next_insn going to start a new group? */
19424 = (can_issue_more
== 0
19425 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19426 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19427 || (can_issue_more
< issue_rate
&&
19428 insn_terminates_group_p (next_insn
, previous_group
)));
19430 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
19431 next_insn
, &group_end
, can_issue_more
,
19437 can_issue_more
= 0;
19438 for (i
= 0; i
< issue_rate
; i
++)
19440 group_insns
[i
] = 0;
19444 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
19445 PUT_MODE (next_insn
, VOIDmode
);
19446 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
19447 PUT_MODE (next_insn
, TImode
);
19450 if (can_issue_more
== 0)
19451 can_issue_more
= issue_rate
;
19454 return group_count
;
19457 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19458 dispatch group boundaries that the scheduler had marked. Pad with nops
19459 any dispatch groups which have vacant issue slots, in order to force the
19460 scheduler's grouping on the processor dispatcher. The function
19461 returns the number of dispatch groups found. */
19464 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19467 rtx_insn
*insn
, *next_insn
;
19470 int can_issue_more
;
19472 int group_count
= 0;
19474 /* Initialize issue_rate. */
19475 issue_rate
= rs6000_issue_rate ();
19476 can_issue_more
= issue_rate
;
19478 insn
= get_next_active_insn (prev_head_insn
, tail
);
19479 next_insn
= get_next_active_insn (insn
, tail
);
19481 while (insn
!= NULL_RTX
)
19484 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
19486 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
19488 if (next_insn
== NULL_RTX
)
19493 /* If the scheduler had marked group termination at this location
19494 (between insn and next_insn), and neither insn nor next_insn will
19495 force group termination, pad the group with nops to force group
19498 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
19499 && !insn_terminates_group_p (insn
, current_group
)
19500 && !insn_terminates_group_p (next_insn
, previous_group
))
19502 if (!is_branch_slot_insn (next_insn
))
19505 while (can_issue_more
)
19508 emit_insn_before (nop
, next_insn
);
19513 can_issue_more
= issue_rate
;
19518 next_insn
= get_next_active_insn (insn
, tail
);
19521 return group_count
;
19524 /* We're beginning a new block. Initialize data structures as necessary. */
19527 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
19528 int sched_verbose ATTRIBUTE_UNUSED
,
19529 int max_ready ATTRIBUTE_UNUSED
)
19531 last_scheduled_insn
= NULL
;
19532 load_store_pendulum
= 0;
19537 /* The following function is called at the end of scheduling BB.
19538 After reload, it inserts nops at insn group bundling. */
19541 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
19546 fprintf (dump
, "=== Finishing schedule.\n");
19548 if (reload_completed
&& rs6000_sched_groups
)
19550 /* Do not run sched_finish hook when selective scheduling enabled. */
19551 if (sel_sched_p ())
19554 if (rs6000_sched_insert_nops
== sched_finish_none
)
19557 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
19558 n_groups
= pad_groups (dump
, sched_verbose
,
19559 current_sched_info
->prev_head
,
19560 current_sched_info
->next_tail
);
19562 n_groups
= redefine_groups (dump
, sched_verbose
,
19563 current_sched_info
->prev_head
,
19564 current_sched_info
->next_tail
);
19566 if (sched_verbose
>= 6)
19568 fprintf (dump
, "ngroups = %d\n", n_groups
);
19569 print_rtl (dump
, current_sched_info
->prev_head
);
19570 fprintf (dump
, "Done finish_sched\n");
19575 struct rs6000_sched_context
19577 short cached_can_issue_more
;
19578 rtx_insn
*last_scheduled_insn
;
19579 int load_store_pendulum
;
19584 typedef struct rs6000_sched_context rs6000_sched_context_def
;
19585 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
19587 /* Allocate store for new scheduling context. */
19589 rs6000_alloc_sched_context (void)
19591 return xmalloc (sizeof (rs6000_sched_context_def
));
19594 /* If CLEAN_P is true then initializes _SC with clean data,
19595 and from the global context otherwise. */
19597 rs6000_init_sched_context (void *_sc
, bool clean_p
)
19599 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
19603 sc
->cached_can_issue_more
= 0;
19604 sc
->last_scheduled_insn
= NULL
;
19605 sc
->load_store_pendulum
= 0;
19606 sc
->divide_cnt
= 0;
19607 sc
->vec_pairing
= 0;
19611 sc
->cached_can_issue_more
= cached_can_issue_more
;
19612 sc
->last_scheduled_insn
= last_scheduled_insn
;
19613 sc
->load_store_pendulum
= load_store_pendulum
;
19614 sc
->divide_cnt
= divide_cnt
;
19615 sc
->vec_pairing
= vec_pairing
;
19619 /* Sets the global scheduling context to the one pointed to by _SC. */
19621 rs6000_set_sched_context (void *_sc
)
19623 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
19625 gcc_assert (sc
!= NULL
);
19627 cached_can_issue_more
= sc
->cached_can_issue_more
;
19628 last_scheduled_insn
= sc
->last_scheduled_insn
;
19629 load_store_pendulum
= sc
->load_store_pendulum
;
19630 divide_cnt
= sc
->divide_cnt
;
19631 vec_pairing
= sc
->vec_pairing
;
19636 rs6000_free_sched_context (void *_sc
)
19638 gcc_assert (_sc
!= NULL
);
19644 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
19646 switch (get_attr_type (insn
))
19661 /* Length in units of the trampoline for entering a nested function. */
19664 rs6000_trampoline_size (void)
19668 switch (DEFAULT_ABI
)
19671 gcc_unreachable ();
19674 ret
= (TARGET_32BIT
) ? 12 : 24;
19678 gcc_assert (!TARGET_32BIT
);
19684 ret
= (TARGET_32BIT
) ? 40 : 48;
19691 /* Emit RTL insns to initialize the variable parts of a trampoline.
19692 FNADDR is an RTX for the address of the function's pure code.
19693 CXT is an RTX for the static chain value for the function. */
19696 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
19698 int regsize
= (TARGET_32BIT
) ? 4 : 8;
19699 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
19700 rtx ctx_reg
= force_reg (Pmode
, cxt
);
19701 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
19703 switch (DEFAULT_ABI
)
19706 gcc_unreachable ();
19708 /* Under AIX, just build the 3 word function descriptor */
19711 rtx fnmem
, fn_reg
, toc_reg
;
19713 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
19714 error ("you cannot take the address of a nested function if you use "
19715 "the %qs option", "-mno-pointers-to-nested-functions");
19717 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
19718 fn_reg
= gen_reg_rtx (Pmode
);
19719 toc_reg
= gen_reg_rtx (Pmode
);
19721 /* Macro to shorten the code expansions below. */
19722 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19724 m_tramp
= replace_equiv_address (m_tramp
, addr
);
19726 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
19727 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
19728 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
19729 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
19730 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
19736 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19740 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
19741 LCT_NORMAL
, VOIDmode
,
19743 GEN_INT (rs6000_trampoline_size ()), SImode
,
19751 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19752 identifier as an argument, so the front end shouldn't look it up. */
19755 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
19757 return is_attribute_p ("altivec", attr_id
);
19760 /* Handle the "altivec" attribute. The attribute may have
19761 arguments as follows:
19763 __attribute__((altivec(vector__)))
19764 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19765 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19767 and may appear more than once (e.g., 'vector bool char') in a
19768 given declaration. */
19771 rs6000_handle_altivec_attribute (tree
*node
,
19772 tree name ATTRIBUTE_UNUSED
,
19774 int flags ATTRIBUTE_UNUSED
,
19775 bool *no_add_attrs
)
19777 tree type
= *node
, result
= NULL_TREE
;
19781 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
19782 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
19783 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
19786 while (POINTER_TYPE_P (type
)
19787 || TREE_CODE (type
) == FUNCTION_TYPE
19788 || TREE_CODE (type
) == METHOD_TYPE
19789 || TREE_CODE (type
) == ARRAY_TYPE
)
19790 type
= TREE_TYPE (type
);
19792 mode
= TYPE_MODE (type
);
19794 /* Check for invalid AltiVec type qualifiers. */
19795 if (type
== long_double_type_node
)
19796 error ("use of %<long double%> in AltiVec types is invalid");
19797 else if (type
== boolean_type_node
)
19798 error ("use of boolean types in AltiVec types is invalid");
19799 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
19800 error ("use of %<complex%> in AltiVec types is invalid");
19801 else if (DECIMAL_FLOAT_MODE_P (mode
))
19802 error ("use of decimal floating point types in AltiVec types is invalid");
19803 else if (!TARGET_VSX
)
19805 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
19808 error ("use of %<long%> in AltiVec types is invalid for "
19809 "64-bit code without %qs", "-mvsx");
19810 else if (rs6000_warn_altivec_long
)
19811 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19814 else if (type
== long_long_unsigned_type_node
19815 || type
== long_long_integer_type_node
)
19816 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19818 else if (type
== double_type_node
)
19819 error ("use of %<double%> in AltiVec types is invalid without %qs",
19823 switch (altivec_type
)
19826 unsigned_p
= TYPE_UNSIGNED (type
);
19830 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
19833 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
19836 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
19839 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
19842 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
19844 case E_SFmode
: result
= V4SF_type_node
; break;
19845 case E_DFmode
: result
= V2DF_type_node
; break;
19846 /* If the user says 'vector int bool', we may be handed the 'bool'
19847 attribute _before_ the 'vector' attribute, and so select the
19848 proper type in the 'b' case below. */
19849 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
19850 case E_V2DImode
: case E_V2DFmode
:
19858 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
19859 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
19860 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
19861 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
19868 case E_V8HImode
: result
= pixel_V8HI_type_node
;
19874 /* Propagate qualifiers attached to the element type
19875 onto the vector type. */
19876 if (result
&& result
!= type
&& TYPE_QUALS (type
))
19877 result
= build_qualified_type (result
, TYPE_QUALS (type
));
19879 *no_add_attrs
= true; /* No need to hang on to the attribute. */
19882 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
19887 /* AltiVec defines five built-in scalar types that serve as vector
19888 elements; we must teach the compiler how to mangle them. The 128-bit
19889 floating point mangling is target-specific as well. MMA defines
19890 two built-in types to be used as opaque vector types. */
19892 static const char *
19893 rs6000_mangle_type (const_tree type
)
19895 type
= TYPE_MAIN_VARIANT (type
);
19897 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
19898 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
19899 && TREE_CODE (type
) != OPAQUE_TYPE
)
19902 if (type
== bool_char_type_node
) return "U6__boolc";
19903 if (type
== bool_short_type_node
) return "U6__bools";
19904 if (type
== pixel_type_node
) return "u7__pixel";
19905 if (type
== bool_int_type_node
) return "U6__booli";
19906 if (type
== bool_long_long_type_node
) return "U6__boolx";
19908 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IBM_P (TYPE_MODE (type
)))
19910 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IEEE_P (TYPE_MODE (type
)))
19911 return ieee128_mangling_gcc_8_1
? "U10__float128" : "u9__ieee128";
19913 if (type
== vector_pair_type_node
)
19914 return "u13__vector_pair";
19915 if (type
== vector_quad_type_node
)
19916 return "u13__vector_quad";
19918 /* For all other types, use the default mangling. */
19922 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19923 struct attribute_spec.handler. */
19926 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
19927 tree args ATTRIBUTE_UNUSED
,
19928 int flags ATTRIBUTE_UNUSED
,
19929 bool *no_add_attrs
)
19931 if (TREE_CODE (*node
) != FUNCTION_TYPE
19932 && TREE_CODE (*node
) != FIELD_DECL
19933 && TREE_CODE (*node
) != TYPE_DECL
)
19935 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
19937 *no_add_attrs
= true;
19943 /* Set longcall attributes on all functions declared when
19944 rs6000_default_long_calls is true. */
19946 rs6000_set_default_type_attributes (tree type
)
19948 if (rs6000_default_long_calls
19949 && (TREE_CODE (type
) == FUNCTION_TYPE
19950 || TREE_CODE (type
) == METHOD_TYPE
))
19951 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
19953 TYPE_ATTRIBUTES (type
));
19956 darwin_set_default_type_attributes (type
);
19960 /* Return a reference suitable for calling a function with the
19961 longcall attribute. */
19964 rs6000_longcall_ref (rtx call_ref
, rtx arg
)
19966 /* System V adds '.' to the internal name, so skip them. */
19967 const char *call_name
= XSTR (call_ref
, 0);
19968 if (*call_name
== '.')
19970 while (*call_name
== '.')
19973 tree node
= get_identifier (call_name
);
19974 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
19979 rtx base
= const0_rtx
;
19981 if (rs6000_pcrel_p ())
19983 rtx reg
= gen_rtx_REG (Pmode
, regno
);
19984 rtx u
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
19985 gen_rtvec (3, base
, call_ref
, arg
),
19986 UNSPECV_PLT_PCREL
);
19987 emit_insn (gen_rtx_SET (reg
, u
));
19991 if (DEFAULT_ABI
== ABI_ELFv2
)
19992 base
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
19996 base
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
19999 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20000 may be used by a function global entry point. For SysV4, r11
20001 is used by __glink_PLTresolve lazy resolver entry. */
20002 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20003 rtx hi
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, base
, call_ref
, arg
),
20005 rtx lo
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20006 gen_rtvec (3, reg
, call_ref
, arg
),
20008 emit_insn (gen_rtx_SET (reg
, hi
));
20009 emit_insn (gen_rtx_SET (reg
, lo
));
20013 return force_reg (Pmode
, call_ref
);
20016 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20017 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20020 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20021 struct attribute_spec.handler. */
20023 rs6000_handle_struct_attribute (tree
*node
, tree name
,
20024 tree args ATTRIBUTE_UNUSED
,
20025 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
20028 if (DECL_P (*node
))
20030 if (TREE_CODE (*node
) == TYPE_DECL
)
20031 type
= &TREE_TYPE (*node
);
20036 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
20037 || TREE_CODE (*type
) == UNION_TYPE
)))
20039 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
20040 *no_add_attrs
= true;
20043 else if ((is_attribute_p ("ms_struct", name
)
20044 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
20045 || ((is_attribute_p ("gcc_struct", name
)
20046 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
20048 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
20050 *no_add_attrs
= true;
20057 rs6000_ms_bitfield_layout_p (const_tree record_type
)
20059 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
20060 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20061 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
20064 #ifdef USING_ELFOS_H
20066 /* A get_unnamed_section callback, used for switching to toc_section. */
20069 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
20071 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20072 && TARGET_MINIMAL_TOC
)
20074 if (!toc_initialized
)
20076 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20077 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20078 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
20079 fprintf (asm_out_file
, "\t.tc ");
20080 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
20081 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20082 fprintf (asm_out_file
, "\n");
20084 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20085 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20086 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20087 fprintf (asm_out_file
, " = .+32768\n");
20088 toc_initialized
= 1;
20091 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20093 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20095 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20096 if (!toc_initialized
)
20098 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20099 toc_initialized
= 1;
20104 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20105 if (!toc_initialized
)
20107 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20108 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20109 fprintf (asm_out_file
, " = .+32768\n");
20110 toc_initialized
= 1;
20115 /* Implement TARGET_ASM_INIT_SECTIONS. */
20118 rs6000_elf_asm_init_sections (void)
20121 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
20124 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
20125 SDATA2_SECTION_ASM_OP
);
20128 /* Implement TARGET_SELECT_RTX_SECTION. */
20131 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
20132 unsigned HOST_WIDE_INT align
)
20134 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20135 return toc_section
;
20137 return default_elf_select_rtx_section (mode
, x
, align
);
20140 /* For a SYMBOL_REF, set generic flags and then perform some
20141 target-specific processing.
20143 When the AIX ABI is requested on a non-AIX system, replace the
20144 function name with the real name (with a leading .) rather than the
20145 function descriptor name. This saves a lot of overriding code to
20146 read the prefixes. */
20148 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
20150 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
20152 default_encode_section_info (decl
, rtl
, first
);
20155 && TREE_CODE (decl
) == FUNCTION_DECL
20157 && DEFAULT_ABI
== ABI_AIX
)
20159 rtx sym_ref
= XEXP (rtl
, 0);
20160 size_t len
= strlen (XSTR (sym_ref
, 0));
20161 char *str
= XALLOCAVEC (char, len
+ 2);
20163 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
20164 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
20169 compare_section_name (const char *section
, const char *templ
)
20173 len
= strlen (templ
);
20174 return (strncmp (section
, templ
, len
) == 0
20175 && (section
[len
] == 0 || section
[len
] == '.'));
20179 rs6000_elf_in_small_data_p (const_tree decl
)
20181 if (rs6000_sdata
== SDATA_NONE
)
20184 /* We want to merge strings, so we never consider them small data. */
20185 if (TREE_CODE (decl
) == STRING_CST
)
20188 /* Functions are never in the small data area. */
20189 if (TREE_CODE (decl
) == FUNCTION_DECL
)
20192 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_SECTION_NAME (decl
))
20194 const char *section
= DECL_SECTION_NAME (decl
);
20195 if (compare_section_name (section
, ".sdata")
20196 || compare_section_name (section
, ".sdata2")
20197 || compare_section_name (section
, ".gnu.linkonce.s")
20198 || compare_section_name (section
, ".sbss")
20199 || compare_section_name (section
, ".sbss2")
20200 || compare_section_name (section
, ".gnu.linkonce.sb")
20201 || strcmp (section
, ".PPC.EMB.sdata0") == 0
20202 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
20207 /* If we are told not to put readonly data in sdata, then don't. */
20208 if (TREE_READONLY (decl
) && rs6000_sdata
!= SDATA_EABI
20209 && !rs6000_readonly_in_sdata
)
20212 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
20215 && size
<= g_switch_value
20216 /* If it's not public, and we're not going to reference it there,
20217 there's no need to put it in the small data section. */
20218 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
20225 #endif /* USING_ELFOS_H */
20227 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20230 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
20232 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
20235 /* Do not place thread-local symbols refs in the object blocks. */
20238 rs6000_use_blocks_for_decl_p (const_tree decl
)
20240 return !DECL_THREAD_LOCAL_P (decl
);
20243 /* Return a REG that occurs in ADDR with coefficient 1.
20244 ADDR can be effectively incremented by incrementing REG.
20246 r0 is special and we must not select it as an address
20247 register by this routine since our caller will try to
20248 increment the returned register via an "la" instruction. */
20251 find_addr_reg (rtx addr
)
20253 while (GET_CODE (addr
) == PLUS
)
20255 if (REG_P (XEXP (addr
, 0))
20256 && REGNO (XEXP (addr
, 0)) != 0)
20257 addr
= XEXP (addr
, 0);
20258 else if (REG_P (XEXP (addr
, 1))
20259 && REGNO (XEXP (addr
, 1)) != 0)
20260 addr
= XEXP (addr
, 1);
20261 else if (CONSTANT_P (XEXP (addr
, 0)))
20262 addr
= XEXP (addr
, 1);
20263 else if (CONSTANT_P (XEXP (addr
, 1)))
20264 addr
= XEXP (addr
, 0);
20266 gcc_unreachable ();
20268 gcc_assert (REG_P (addr
) && REGNO (addr
) != 0);
20273 rs6000_fatal_bad_address (rtx op
)
20275 fatal_insn ("bad address", op
);
20280 vec
<branch_island
, va_gc
> *branch_islands
;
20282 /* Remember to generate a branch island for far calls to the given
20286 add_compiler_branch_island (tree label_name
, tree function_name
,
20289 branch_island bi
= {function_name
, label_name
, line_number
};
20290 vec_safe_push (branch_islands
, bi
);
20293 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20294 already there or not. */
20297 no_previous_def (tree function_name
)
20302 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20303 if (function_name
== bi
->function_name
)
20308 /* GET_PREV_LABEL gets the label name from the previous definition of
20312 get_prev_label (tree function_name
)
20317 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20318 if (function_name
== bi
->function_name
)
20319 return bi
->label_name
;
20323 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20326 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
20328 unsigned int length
;
20329 char *symbol_name
, *lazy_ptr_name
;
20330 char *local_label_0
;
20331 static unsigned label
= 0;
20333 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20334 symb
= (*targetm
.strip_name_encoding
) (symb
);
20336 length
= strlen (symb
);
20337 symbol_name
= XALLOCAVEC (char, length
+ 32);
20338 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
20340 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
20341 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
20345 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
20346 fprintf (file
, "\t.align 5\n");
20348 fprintf (file
, "%s:\n", stub
);
20349 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20352 local_label_0
= XALLOCAVEC (char, 16);
20353 sprintf (local_label_0
, "L%u$spb", label
);
20355 fprintf (file
, "\tmflr r0\n");
20356 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
20357 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
20358 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
20359 lazy_ptr_name
, local_label_0
);
20360 fprintf (file
, "\tmtlr r0\n");
20361 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
20362 (TARGET_64BIT
? "ldu" : "lwzu"),
20363 lazy_ptr_name
, local_label_0
);
20364 fprintf (file
, "\tmtctr r12\n");
20365 fprintf (file
, "\tbctr\n");
20367 else /* mdynamic-no-pic or mkernel. */
20369 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
20370 fprintf (file
, "\t.align 4\n");
20372 fprintf (file
, "%s:\n", stub
);
20373 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20375 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
20376 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
20377 (TARGET_64BIT
? "ldu" : "lwzu"),
20379 fprintf (file
, "\tmtctr r12\n");
20380 fprintf (file
, "\tbctr\n");
20383 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
20384 fprintf (file
, "%s:\n", lazy_ptr_name
);
20385 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20386 fprintf (file
, "%sdyld_stub_binding_helper\n",
20387 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
20390 /* Legitimize PIC addresses. If the address is already
20391 position-independent, we return ORIG. Newly generated
20392 position-independent addresses go into a reg. This is REG if non
20393 zero, otherwise we allocate register(s) as necessary. */
20395 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20398 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
20403 if (reg
== NULL
&& !reload_completed
)
20404 reg
= gen_reg_rtx (Pmode
);
20406 if (GET_CODE (orig
) == CONST
)
20410 if (GET_CODE (XEXP (orig
, 0)) == PLUS
20411 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
20414 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
20416 /* Use a different reg for the intermediate value, as
20417 it will be marked UNCHANGING. */
20418 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
20419 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
20422 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
20425 if (CONST_INT_P (offset
))
20427 if (SMALL_INT (offset
))
20428 return plus_constant (Pmode
, base
, INTVAL (offset
));
20429 else if (!reload_completed
)
20430 offset
= force_reg (Pmode
, offset
);
20433 rtx mem
= force_const_mem (Pmode
, orig
);
20434 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
20437 return gen_rtx_PLUS (Pmode
, base
, offset
);
20440 /* Fall back on generic machopic code. */
20441 return machopic_legitimize_pic_address (orig
, mode
, reg
);
20444 /* Output a .machine directive for the Darwin assembler, and call
20445 the generic start_file routine. */
20448 rs6000_darwin_file_start (void)
20450 static const struct
20454 HOST_WIDE_INT if_set
;
20456 { "ppc64", "ppc64", MASK_64BIT
},
20457 { "970", "ppc970", MASK_PPC_GPOPT
| MASK_MFCRF
| MASK_POWERPC64
},
20458 { "power4", "ppc970", 0 },
20459 { "G5", "ppc970", 0 },
20460 { "7450", "ppc7450", 0 },
20461 { "7400", "ppc7400", MASK_ALTIVEC
},
20462 { "G4", "ppc7400", 0 },
20463 { "750", "ppc750", 0 },
20464 { "740", "ppc750", 0 },
20465 { "G3", "ppc750", 0 },
20466 { "604e", "ppc604e", 0 },
20467 { "604", "ppc604", 0 },
20468 { "603e", "ppc603", 0 },
20469 { "603", "ppc603", 0 },
20470 { "601", "ppc601", 0 },
20471 { NULL
, "ppc", 0 } };
20472 const char *cpu_id
= "";
20475 rs6000_file_start ();
20476 darwin_file_start ();
20478 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20480 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
20481 cpu_id
= rs6000_default_cpu
;
20483 if (global_options_set
.x_rs6000_cpu_index
)
20484 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
20486 /* Look through the mapping array. Pick the first name that either
20487 matches the argument, has a bit set in IF_SET that is also set
20488 in the target flags, or has a NULL name. */
20491 while (mapping
[i
].arg
!= NULL
20492 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
20493 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
20496 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
20499 #endif /* TARGET_MACHO */
20503 rs6000_elf_reloc_rw_mask (void)
20507 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20513 /* Record an element in the table of global constructors. SYMBOL is
20514 a SYMBOL_REF of the function to be called; PRIORITY is a number
20515 between 0 and MAX_INIT_PRIORITY.
20517 This differs from default_named_section_asm_out_constructor in
20518 that we have special handling for -mrelocatable. */
20520 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
20522 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
20524 const char *section
= ".ctors";
20527 if (priority
!= DEFAULT_INIT_PRIORITY
)
20529 sprintf (buf
, ".ctors.%.5u",
20530 /* Invert the numbering so the linker puts us in the proper
20531 order; constructors are run from right to left, and the
20532 linker sorts in increasing order. */
20533 MAX_INIT_PRIORITY
- priority
);
20537 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
20538 assemble_align (POINTER_SIZE
);
20540 if (DEFAULT_ABI
== ABI_V4
20541 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
20543 fputs ("\t.long (", asm_out_file
);
20544 output_addr_const (asm_out_file
, symbol
);
20545 fputs (")@fixup\n", asm_out_file
);
20548 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
20551 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
20553 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
20555 const char *section
= ".dtors";
20558 if (priority
!= DEFAULT_INIT_PRIORITY
)
20560 sprintf (buf
, ".dtors.%.5u",
20561 /* Invert the numbering so the linker puts us in the proper
20562 order; constructors are run from right to left, and the
20563 linker sorts in increasing order. */
20564 MAX_INIT_PRIORITY
- priority
);
20568 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
20569 assemble_align (POINTER_SIZE
);
20571 if (DEFAULT_ABI
== ABI_V4
20572 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
20574 fputs ("\t.long (", asm_out_file
);
20575 output_addr_const (asm_out_file
, symbol
);
20576 fputs (")@fixup\n", asm_out_file
);
20579 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
20583 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
20585 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
20587 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
20588 ASM_OUTPUT_LABEL (file
, name
);
20589 fputs (DOUBLE_INT_ASM_OP
, file
);
20590 rs6000_output_function_entry (file
, name
);
20591 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
20594 fputs ("\t.size\t", file
);
20595 assemble_name (file
, name
);
20596 fputs (",24\n\t.type\t.", file
);
20597 assemble_name (file
, name
);
20598 fputs (",@function\n", file
);
20599 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
20601 fputs ("\t.globl\t.", file
);
20602 assemble_name (file
, name
);
20607 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
20608 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
20609 rs6000_output_function_entry (file
, name
);
20610 fputs (":\n", file
);
20615 if (DEFAULT_ABI
== ABI_V4
20616 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
20617 && !TARGET_SECURE_PLT
20618 && (!constant_pool_empty_p () || crtl
->profile
)
20619 && (uses_toc
= uses_TOC ()))
20624 switch_to_other_text_partition ();
20625 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
20627 fprintf (file
, "\t.long ");
20628 assemble_name (file
, toc_label_name
);
20631 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
20632 assemble_name (file
, buf
);
20635 switch_to_other_text_partition ();
20638 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
20639 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
20641 if (TARGET_CMODEL
== CMODEL_LARGE
20642 && rs6000_global_entry_point_prologue_needed_p ())
20646 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
20648 fprintf (file
, "\t.quad .TOC.-");
20649 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
20650 assemble_name (file
, buf
);
20654 if (DEFAULT_ABI
== ABI_AIX
)
20656 const char *desc_name
, *orig_name
;
20658 orig_name
= (*targetm
.strip_name_encoding
) (name
);
20659 desc_name
= orig_name
;
20660 while (*desc_name
== '.')
20663 if (TREE_PUBLIC (decl
))
20664 fprintf (file
, "\t.globl %s\n", desc_name
);
20666 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20667 fprintf (file
, "%s:\n", desc_name
);
20668 fprintf (file
, "\t.long %s\n", orig_name
);
20669 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
20670 fputs ("\t.long 0\n", file
);
20671 fprintf (file
, "\t.previous\n");
20673 ASM_OUTPUT_LABEL (file
, name
);
20676 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
20678 rs6000_elf_file_end (void)
20680 #ifdef HAVE_AS_GNU_ATTRIBUTE
20681 /* ??? The value emitted depends on options active at file end.
20682 Assume anyone using #pragma or attributes that might change
20683 options knows what they are doing. */
20684 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
20685 && rs6000_passes_float
)
20689 if (TARGET_HARD_FLOAT
)
20693 if (rs6000_passes_long_double
)
20695 if (!TARGET_LONG_DOUBLE_128
)
20697 else if (TARGET_IEEEQUAD
)
20702 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
20704 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
20706 if (rs6000_passes_vector
)
20707 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
20708 (TARGET_ALTIVEC_ABI
? 2 : 1));
20709 if (rs6000_returns_struct
)
20710 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
20711 aix_struct_return
? 2 : 1);
20714 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20715 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
20716 file_end_indicate_exec_stack ();
20719 if (flag_split_stack
)
20720 file_end_indicate_split_stack ();
20724 /* We have expanded a CPU builtin, so we need to emit a reference to
20725 the special symbol that LIBC uses to declare it supports the
20726 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20727 switch_to_section (data_section
);
20728 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
20729 fprintf (asm_out_file
, "\t%s %s\n",
20730 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
20737 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20738 #define HAVE_XCOFF_DWARF_EXTRAS 0
20741 static enum unwind_info_type
20742 rs6000_xcoff_debug_unwind_info (void)
20748 rs6000_xcoff_asm_output_anchor (rtx symbol
)
20752 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
20753 SYMBOL_REF_BLOCK_OFFSET (symbol
));
20754 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
20755 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
20756 fprintf (asm_out_file
, ",");
20757 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
20758 fprintf (asm_out_file
, "\n");
20762 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
20764 fputs (GLOBAL_ASM_OP
, stream
);
20765 RS6000_OUTPUT_BASENAME (stream
, name
);
20766 putc ('\n', stream
);
20769 /* A get_unnamed_decl callback, used for read-only sections. PTR
20770 points to the section string variable. */
20773 rs6000_xcoff_output_readonly_section_asm_op (const void *directive
)
20775 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
20776 *(const char *const *) directive
,
20777 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
20780 /* Likewise for read-write sections. */
20783 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive
)
20785 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
20786 *(const char *const *) directive
,
20787 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
20791 rs6000_xcoff_output_tls_section_asm_op (const void *directive
)
20793 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
20794 *(const char *const *) directive
,
20795 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
20798 /* A get_unnamed_section callback, used for switching to toc_section. */
20801 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
20803 if (TARGET_MINIMAL_TOC
)
20805 /* toc_section is always selected at least once from
20806 rs6000_xcoff_file_start, so this is guaranteed to
20807 always be defined once and only once in each file. */
20808 if (!toc_initialized
)
20810 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
20811 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
20812 toc_initialized
= 1;
20814 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
20815 (TARGET_32BIT
? "" : ",3"));
20818 fputs ("\t.toc\n", asm_out_file
);
20821 /* Implement TARGET_ASM_INIT_SECTIONS. */
20824 rs6000_xcoff_asm_init_sections (void)
20826 read_only_data_section
20827 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
20828 &xcoff_read_only_section_name
);
20830 private_data_section
20831 = get_unnamed_section (SECTION_WRITE
,
20832 rs6000_xcoff_output_readwrite_section_asm_op
,
20833 &xcoff_private_data_section_name
);
20835 read_only_private_data_section
20836 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
20837 &xcoff_private_rodata_section_name
);
20840 = get_unnamed_section (SECTION_TLS
,
20841 rs6000_xcoff_output_tls_section_asm_op
,
20842 &xcoff_tls_data_section_name
);
20844 tls_private_data_section
20845 = get_unnamed_section (SECTION_TLS
,
20846 rs6000_xcoff_output_tls_section_asm_op
,
20847 &xcoff_private_data_section_name
);
20850 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
20852 readonly_data_section
= read_only_data_section
;
20856 rs6000_xcoff_reloc_rw_mask (void)
20862 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
20863 tree decl ATTRIBUTE_UNUSED
)
20866 static const char * const suffix
[5] = { "PR", "RO", "RW", "TL", "XO" };
20868 if (flags
& SECTION_EXCLUDE
)
20870 else if (flags
& SECTION_DEBUG
)
20872 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
20875 else if (flags
& SECTION_CODE
)
20877 else if (flags
& SECTION_TLS
)
20879 else if (flags
& SECTION_WRITE
)
20884 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
20885 (flags
& SECTION_CODE
) ? "." : "",
20886 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
20889 #define IN_NAMED_SECTION(DECL) \
20890 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20891 && DECL_SECTION_NAME (DECL) != NULL)
20894 rs6000_xcoff_select_section (tree decl
, int reloc
,
20895 unsigned HOST_WIDE_INT align
)
20897 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20899 if (align
> BIGGEST_ALIGNMENT
&& VAR_OR_FUNCTION_DECL_P (decl
))
20901 resolve_unique_section (decl
, reloc
, true);
20902 if (IN_NAMED_SECTION (decl
))
20903 return get_named_section (decl
, NULL
, reloc
);
20906 if (decl_readonly_section (decl
, reloc
))
20908 if (TREE_PUBLIC (decl
))
20909 return read_only_data_section
;
20911 return read_only_private_data_section
;
20916 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
20918 if (TREE_PUBLIC (decl
))
20919 return tls_data_section
;
20920 else if (bss_initializer_p (decl
))
20922 /* Convert to COMMON to emit in BSS. */
20923 DECL_COMMON (decl
) = 1;
20924 return tls_comm_section
;
20927 return tls_private_data_section
;
20931 if (TREE_PUBLIC (decl
))
20932 return data_section
;
20934 return private_data_section
;
20939 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
20943 /* Use select_section for private data and uninitialized data with
20944 alignment <= BIGGEST_ALIGNMENT. */
20945 if (!TREE_PUBLIC (decl
)
20946 || DECL_COMMON (decl
)
20947 || (DECL_INITIAL (decl
) == NULL_TREE
20948 && DECL_ALIGN (decl
) <= BIGGEST_ALIGNMENT
)
20949 || DECL_INITIAL (decl
) == error_mark_node
20950 || (flag_zero_initialized_in_bss
20951 && initializer_zerop (DECL_INITIAL (decl
))))
20954 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
20955 name
= (*targetm
.strip_name_encoding
) (name
);
20956 set_decl_section_name (decl
, name
);
20959 /* Select section for constant in constant pool.
20961 On RS/6000, all constants are in the private read-only data area.
20962 However, if this is being placed in the TOC it must be output as a
20966 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
20967 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
20969 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20970 return toc_section
;
20972 return read_only_private_data_section
;
20975 /* Remove any trailing [DS] or the like from the symbol name. */
20977 static const char *
20978 rs6000_xcoff_strip_name_encoding (const char *name
)
20983 len
= strlen (name
);
20984 if (name
[len
- 1] == ']')
20985 return ggc_alloc_string (name
, len
- 4);
20990 /* Section attributes. AIX is always PIC. */
20992 static unsigned int
20993 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
20995 unsigned int align
;
20996 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
20998 /* Align to at least UNIT size. */
20999 if ((flags
& SECTION_CODE
) != 0 || !decl
|| !DECL_P (decl
))
21000 align
= MIN_UNITS_PER_WORD
;
21002 /* Increase alignment of large objects if not already stricter. */
21003 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
21004 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
21005 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
21007 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
21010 /* Output at beginning of assembler file.
21012 Initialize the section names for the RS/6000 at this point.
21014 Specify filename, including full path, to assembler.
21016 We want to go into the TOC section so at least one .toc will be emitted.
21017 Also, in order to output proper .bs/.es pairs, we need at least one static
21018 [RW] section emitted.
21020 Finally, declare mcount when profiling to make the assembler happy. */
21023 rs6000_xcoff_file_start (void)
21025 rs6000_gen_section_name (&xcoff_bss_section_name
,
21026 main_input_filename
, ".bss_");
21027 rs6000_gen_section_name (&xcoff_private_data_section_name
,
21028 main_input_filename
, ".rw_");
21029 rs6000_gen_section_name (&xcoff_private_rodata_section_name
,
21030 main_input_filename
, ".rop_");
21031 rs6000_gen_section_name (&xcoff_read_only_section_name
,
21032 main_input_filename
, ".ro_");
21033 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
21034 main_input_filename
, ".tls_");
21035 rs6000_gen_section_name (&xcoff_tbss_section_name
,
21036 main_input_filename
, ".tbss_[UL]");
21038 fputs ("\t.file\t", asm_out_file
);
21039 output_quoted_string (asm_out_file
, main_input_filename
);
21040 fputc ('\n', asm_out_file
);
21041 if (write_symbols
!= NO_DEBUG
)
21042 switch_to_section (private_data_section
);
21043 switch_to_section (toc_section
);
21044 switch_to_section (text_section
);
21046 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
21047 rs6000_file_start ();
21050 /* Output at end of assembler file.
21051 On the RS/6000, referencing data should automatically pull in text. */
21054 rs6000_xcoff_file_end (void)
21056 switch_to_section (text_section
);
21057 fputs ("_section_.text:\n", asm_out_file
);
21058 switch_to_section (data_section
);
21059 fputs (TARGET_32BIT
21060 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21064 struct declare_alias_data
21067 bool function_descriptor
;
21070 /* Declare alias N. A helper function for for_node_and_aliases. */
21073 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
21075 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
21076 /* Main symbol is output specially, because varasm machinery does part of
21077 the job for us - we do not need to declare .globl/lglobs and such. */
21078 if (!n
->alias
|| n
->weakref
)
21081 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
21084 /* Prevent assemble_alias from trying to use .set pseudo operation
21085 that does not behave as expected by the middle-end. */
21086 TREE_ASM_WRITTEN (n
->decl
) = true;
21088 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
21089 char *buffer
= (char *) alloca (strlen (name
) + 2);
21091 int dollar_inside
= 0;
21093 strcpy (buffer
, name
);
21094 p
= strchr (buffer
, '$');
21098 p
= strchr (p
+ 1, '$');
21100 if (TREE_PUBLIC (n
->decl
))
21102 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
21104 if (dollar_inside
) {
21105 if (data
->function_descriptor
)
21106 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21107 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21109 if (data
->function_descriptor
)
21111 fputs ("\t.globl .", data
->file
);
21112 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21113 putc ('\n', data
->file
);
21115 fputs ("\t.globl ", data
->file
);
21116 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21117 putc ('\n', data
->file
);
21119 #ifdef ASM_WEAKEN_DECL
21120 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
21121 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
21128 if (data
->function_descriptor
)
21129 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21130 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21132 if (data
->function_descriptor
)
21134 fputs ("\t.lglobl .", data
->file
);
21135 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21136 putc ('\n', data
->file
);
21138 fputs ("\t.lglobl ", data
->file
);
21139 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21140 putc ('\n', data
->file
);
21142 if (data
->function_descriptor
)
21143 fputs (".", data
->file
);
21144 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21145 fputs (":\n", data
->file
);
21150 #ifdef HAVE_GAS_HIDDEN
21151 /* Helper function to calculate visibility of a DECL
21152 and return the value as a const string. */
21154 static const char *
21155 rs6000_xcoff_visibility (tree decl
)
21157 static const char * const visibility_types
[] = {
21158 "", ",protected", ",hidden", ",internal"
21161 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
21162 return visibility_types
[vis
];
21167 /* This macro produces the initial definition of a function name.
21168 On the RS/6000, we need to place an extra '.' in the function name and
21169 output the function descriptor.
21170 Dollar signs are converted to underscores.
21172 The csect for the function will have already been created when
21173 text_section was selected. We do have to go back to that csect, however.
21175 The third and fourth parameters to the .function pseudo-op (16 and 044)
21176 are placeholders which no longer have any use.
21178 Because AIX assembler's .set command has unexpected semantics, we output
21179 all aliases as alternative labels in front of the definition. */
21182 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
21184 char *buffer
= (char *) alloca (strlen (name
) + 1);
21186 int dollar_inside
= 0;
21187 struct declare_alias_data data
= {file
, false};
21189 strcpy (buffer
, name
);
21190 p
= strchr (buffer
, '$');
21194 p
= strchr (p
+ 1, '$');
21196 if (TREE_PUBLIC (decl
))
21198 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
21200 if (dollar_inside
) {
21201 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21202 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21204 fputs ("\t.globl .", file
);
21205 RS6000_OUTPUT_BASENAME (file
, buffer
);
21206 #ifdef HAVE_GAS_HIDDEN
21207 fputs (rs6000_xcoff_visibility (decl
), file
);
21214 if (dollar_inside
) {
21215 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21216 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21218 fputs ("\t.lglobl .", file
);
21219 RS6000_OUTPUT_BASENAME (file
, buffer
);
21222 fputs ("\t.csect ", file
);
21223 RS6000_OUTPUT_BASENAME (file
, buffer
);
21224 fputs (TARGET_32BIT
? "[DS]\n" : "[DS],3\n", file
);
21225 RS6000_OUTPUT_BASENAME (file
, buffer
);
21226 fputs (":\n", file
);
21227 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21229 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
21230 RS6000_OUTPUT_BASENAME (file
, buffer
);
21231 fputs (", TOC[tc0], 0\n", file
);
21233 switch_to_section (function_section (decl
));
21235 RS6000_OUTPUT_BASENAME (file
, buffer
);
21236 fputs (":\n", file
);
21237 data
.function_descriptor
= true;
21238 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21240 if (!DECL_IGNORED_P (decl
))
21242 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
21243 xcoffout_declare_function (file
, decl
, buffer
);
21244 else if (write_symbols
== DWARF2_DEBUG
)
21246 name
= (*targetm
.strip_name_encoding
) (name
);
21247 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
21254 /* Output assembly language to globalize a symbol from a DECL,
21255 possibly with visibility. */
21258 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
21260 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
21261 fputs (GLOBAL_ASM_OP
, stream
);
21262 RS6000_OUTPUT_BASENAME (stream
, name
);
21263 #ifdef HAVE_GAS_HIDDEN
21264 fputs (rs6000_xcoff_visibility (decl
), stream
);
21266 putc ('\n', stream
);
21269 /* Output assembly language to define a symbol as COMMON from a DECL,
21270 possibly with visibility. */
21273 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
21274 tree decl ATTRIBUTE_UNUSED
,
21276 unsigned HOST_WIDE_INT size
,
21277 unsigned HOST_WIDE_INT align
)
21279 unsigned HOST_WIDE_INT align2
= 2;
21282 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
21286 fputs (COMMON_ASM_OP
, stream
);
21287 RS6000_OUTPUT_BASENAME (stream
, name
);
21290 "," HOST_WIDE_INT_PRINT_UNSIGNED
"," HOST_WIDE_INT_PRINT_UNSIGNED
,
21293 #ifdef HAVE_GAS_HIDDEN
21295 fputs (rs6000_xcoff_visibility (decl
), stream
);
21297 putc ('\n', stream
);
21300 /* This macro produces the initial definition of a object (variable) name.
21301 Because AIX assembler's .set command has unexpected semantics, we output
21302 all aliases as alternative labels in front of the definition. */
21305 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
21307 struct declare_alias_data data
= {file
, false};
21308 RS6000_OUTPUT_BASENAME (file
, name
);
21309 fputs (":\n", file
);
21310 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21314 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21317 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
21319 fputs (integer_asm_op (size
, FALSE
), file
);
21320 assemble_name (file
, label
);
21321 fputs ("-$", file
);
21324 /* Output a symbol offset relative to the dbase for the current object.
21325 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21328 __gcc_unwind_dbase is embedded in all executables/libraries through
21329 libgcc/config/rs6000/crtdbase.S. */
21332 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
21334 fputs (integer_asm_op (size
, FALSE
), file
);
21335 assemble_name (file
, label
);
21336 fputs("-__gcc_unwind_dbase", file
);
21341 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
21345 const char *symname
;
21347 default_encode_section_info (decl
, rtl
, first
);
21349 /* Careful not to prod global register variables. */
21352 symbol
= XEXP (rtl
, 0);
21353 if (!SYMBOL_REF_P (symbol
))
21356 flags
= SYMBOL_REF_FLAGS (symbol
);
21358 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21359 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
21361 SYMBOL_REF_FLAGS (symbol
) = flags
;
21363 /* Append mapping class to extern decls. */
21364 symname
= XSTR (symbol
, 0);
21365 if (decl
/* sync condition with assemble_external () */
21366 && DECL_P (decl
) && DECL_EXTERNAL (decl
) && TREE_PUBLIC (decl
)
21367 && ((TREE_CODE (decl
) == VAR_DECL
&& !DECL_THREAD_LOCAL_P (decl
))
21368 || TREE_CODE (decl
) == FUNCTION_DECL
)
21369 && symname
[strlen (symname
) - 1] != ']')
21371 char *newname
= (char *) alloca (strlen (symname
) + 5);
21372 strcpy (newname
, symname
);
21373 strcat (newname
, (TREE_CODE (decl
) == FUNCTION_DECL
21374 ? "[DS]" : "[UA]"));
21375 XSTR (symbol
, 0) = ggc_strdup (newname
);
21378 #endif /* HAVE_AS_TLS */
21379 #endif /* TARGET_XCOFF */
21382 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
21383 const char *name
, const char *val
)
21385 fputs ("\t.weak\t", stream
);
21386 RS6000_OUTPUT_BASENAME (stream
, name
);
21387 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
21388 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
21391 fputs ("[DS]", stream
);
21392 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21394 fputs (rs6000_xcoff_visibility (decl
), stream
);
21396 fputs ("\n\t.weak\t.", stream
);
21397 RS6000_OUTPUT_BASENAME (stream
, name
);
21399 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21401 fputs (rs6000_xcoff_visibility (decl
), stream
);
21403 fputc ('\n', stream
);
21406 #ifdef ASM_OUTPUT_DEF
21407 ASM_OUTPUT_DEF (stream
, name
, val
);
21409 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
21410 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
21412 fputs ("\t.set\t.", stream
);
21413 RS6000_OUTPUT_BASENAME (stream
, name
);
21414 fputs (",.", stream
);
21415 RS6000_OUTPUT_BASENAME (stream
, val
);
21416 fputc ('\n', stream
);
21422 /* Return true if INSN should not be copied. */
21425 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
21427 return recog_memoized (insn
) >= 0
21428 && get_attr_cannot_copy (insn
);
21431 /* Compute a (partial) cost for rtx X. Return true if the complete
21432 cost has been computed, and false if subexpressions should be
21433 scanned. In either case, *TOTAL contains the cost result. */
21436 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
21437 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
21439 int code
= GET_CODE (x
);
21443 /* On the RS/6000, if it is valid in the insn, it is free. */
21445 if (((outer_code
== SET
21446 || outer_code
== PLUS
21447 || outer_code
== MINUS
)
21448 && (satisfies_constraint_I (x
)
21449 || satisfies_constraint_L (x
)))
21450 || (outer_code
== AND
21451 && (satisfies_constraint_K (x
)
21453 ? satisfies_constraint_L (x
)
21454 : satisfies_constraint_J (x
))))
21455 || ((outer_code
== IOR
|| outer_code
== XOR
)
21456 && (satisfies_constraint_K (x
)
21458 ? satisfies_constraint_L (x
)
21459 : satisfies_constraint_J (x
))))
21460 || outer_code
== ASHIFT
21461 || outer_code
== ASHIFTRT
21462 || outer_code
== LSHIFTRT
21463 || outer_code
== ROTATE
21464 || outer_code
== ROTATERT
21465 || outer_code
== ZERO_EXTRACT
21466 || (outer_code
== MULT
21467 && satisfies_constraint_I (x
))
21468 || ((outer_code
== DIV
|| outer_code
== UDIV
21469 || outer_code
== MOD
|| outer_code
== UMOD
)
21470 && exact_log2 (INTVAL (x
)) >= 0)
21471 || (outer_code
== COMPARE
21472 && (satisfies_constraint_I (x
)
21473 || satisfies_constraint_K (x
)))
21474 || ((outer_code
== EQ
|| outer_code
== NE
)
21475 && (satisfies_constraint_I (x
)
21476 || satisfies_constraint_K (x
)
21478 ? satisfies_constraint_L (x
)
21479 : satisfies_constraint_J (x
))))
21480 || (outer_code
== GTU
21481 && satisfies_constraint_I (x
))
21482 || (outer_code
== LTU
21483 && satisfies_constraint_P (x
)))
21488 else if ((outer_code
== PLUS
21489 && reg_or_add_cint_operand (x
, mode
))
21490 || (outer_code
== MINUS
21491 && reg_or_sub_cint_operand (x
, mode
))
21492 || ((outer_code
== SET
21493 || outer_code
== IOR
21494 || outer_code
== XOR
)
21496 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
21498 *total
= COSTS_N_INSNS (1);
21504 case CONST_WIDE_INT
:
21508 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21512 /* When optimizing for size, MEM should be slightly more expensive
21513 than generating address, e.g., (plus (reg) (const)).
21514 L1 cache latency is about two instructions. */
21515 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21516 if (rs6000_slow_unaligned_access (mode
, MEM_ALIGN (x
)))
21517 *total
+= COSTS_N_INSNS (100);
21526 if (FLOAT_MODE_P (mode
))
21527 *total
= rs6000_cost
->fp
;
21529 *total
= COSTS_N_INSNS (1);
21533 if (CONST_INT_P (XEXP (x
, 1))
21534 && satisfies_constraint_I (XEXP (x
, 1)))
21536 if (INTVAL (XEXP (x
, 1)) >= -256
21537 && INTVAL (XEXP (x
, 1)) <= 255)
21538 *total
= rs6000_cost
->mulsi_const9
;
21540 *total
= rs6000_cost
->mulsi_const
;
21542 else if (mode
== SFmode
)
21543 *total
= rs6000_cost
->fp
;
21544 else if (FLOAT_MODE_P (mode
))
21545 *total
= rs6000_cost
->dmul
;
21546 else if (mode
== DImode
)
21547 *total
= rs6000_cost
->muldi
;
21549 *total
= rs6000_cost
->mulsi
;
21553 if (mode
== SFmode
)
21554 *total
= rs6000_cost
->fp
;
21556 *total
= rs6000_cost
->dmul
;
21561 if (FLOAT_MODE_P (mode
))
21563 *total
= mode
== DFmode
? rs6000_cost
->ddiv
21564 : rs6000_cost
->sdiv
;
21571 if (CONST_INT_P (XEXP (x
, 1))
21572 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
21574 if (code
== DIV
|| code
== MOD
)
21576 *total
= COSTS_N_INSNS (2);
21579 *total
= COSTS_N_INSNS (1);
21583 if (GET_MODE (XEXP (x
, 1)) == DImode
)
21584 *total
= rs6000_cost
->divdi
;
21586 *total
= rs6000_cost
->divsi
;
21588 /* Add in shift and subtract for MOD unless we have a mod instruction. */
21589 if (!TARGET_MODULO
&& (code
== MOD
|| code
== UMOD
))
21590 *total
+= COSTS_N_INSNS (2);
21594 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
21598 *total
= COSTS_N_INSNS (4);
21602 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
21606 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
21610 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
21613 *total
= COSTS_N_INSNS (1);
21617 if (CONST_INT_P (XEXP (x
, 1)))
21619 rtx left
= XEXP (x
, 0);
21620 rtx_code left_code
= GET_CODE (left
);
21622 /* rotate-and-mask: 1 insn. */
21623 if ((left_code
== ROTATE
21624 || left_code
== ASHIFT
21625 || left_code
== LSHIFTRT
)
21626 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
21628 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
21629 if (!CONST_INT_P (XEXP (left
, 1)))
21630 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
21631 *total
+= COSTS_N_INSNS (1);
21635 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
21636 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
21637 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
21638 || (val
& 0xffff) == val
21639 || (val
& 0xffff0000) == val
21640 || ((val
& 0xffff) == 0 && mode
== SImode
))
21642 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
21643 *total
+= COSTS_N_INSNS (1);
21648 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
21650 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
21651 *total
+= COSTS_N_INSNS (2);
21656 *total
= COSTS_N_INSNS (1);
21661 *total
= COSTS_N_INSNS (1);
21667 *total
= COSTS_N_INSNS (1);
21671 /* The EXTSWSLI instruction is a combined instruction. Don't count both
21672 the sign extend and shift separately within the insn. */
21673 if (TARGET_EXTSWSLI
&& mode
== DImode
21674 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
21675 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
21686 /* Handle mul_highpart. */
21687 if (outer_code
== TRUNCATE
21688 && GET_CODE (XEXP (x
, 0)) == MULT
)
21690 if (mode
== DImode
)
21691 *total
= rs6000_cost
->muldi
;
21693 *total
= rs6000_cost
->mulsi
;
21696 else if (outer_code
== AND
)
21699 *total
= COSTS_N_INSNS (1);
21704 if (MEM_P (XEXP (x
, 0)))
21707 *total
= COSTS_N_INSNS (1);
21713 if (!FLOAT_MODE_P (mode
))
21715 *total
= COSTS_N_INSNS (1);
21721 case UNSIGNED_FLOAT
:
21724 case FLOAT_TRUNCATE
:
21725 *total
= rs6000_cost
->fp
;
21729 if (mode
== DFmode
)
21730 *total
= rs6000_cost
->sfdf_convert
;
21732 *total
= rs6000_cost
->fp
;
21739 *total
= COSTS_N_INSNS (1);
21742 else if (FLOAT_MODE_P (mode
) && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
)
21744 *total
= rs6000_cost
->fp
;
21753 /* Carry bit requires mode == Pmode.
21754 NEG or PLUS already counted so only add one. */
21756 && (outer_code
== NEG
|| outer_code
== PLUS
))
21758 *total
= COSTS_N_INSNS (1);
21766 if (outer_code
== SET
)
21768 if (XEXP (x
, 1) == const0_rtx
)
21770 *total
= COSTS_N_INSNS (2);
21775 *total
= COSTS_N_INSNS (3);
21780 if (outer_code
== COMPARE
)
21788 if (XINT (x
, 1) == UNSPEC_MMA_XXSETACCZ
)
21802 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21805 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
21806 int opno
, int *total
, bool speed
)
21808 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
21811 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21812 "opno = %d, total = %d, speed = %s, x:\n",
21813 ret
? "complete" : "scan inner",
21814 GET_MODE_NAME (mode
),
21815 GET_RTX_NAME (outer_code
),
21818 speed
? "true" : "false");
21826 rs6000_insn_cost (rtx_insn
*insn
, bool speed
)
21828 if (recog_memoized (insn
) < 0)
21831 /* If we are optimizing for size, just use the length. */
21833 return get_attr_length (insn
);
21835 /* Use the cost if provided. */
21836 int cost
= get_attr_cost (insn
);
21840 /* If the insn tells us how many insns there are, use that. Otherwise use
21841 the length/4. Adjust the insn length to remove the extra size that
21842 prefixed instructions take. */
21843 int n
= get_attr_num_insns (insn
);
21846 int length
= get_attr_length (insn
);
21847 if (get_attr_prefixed (insn
) == PREFIXED_YES
)
21850 ADJUST_INSN_LENGTH (insn
, adjust
);
21857 enum attr_type type
= get_attr_type (insn
);
21864 cost
= COSTS_N_INSNS (n
+ 1);
21868 switch (get_attr_size (insn
))
21871 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const9
;
21874 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const
;
21877 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi
;
21880 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->muldi
;
21883 gcc_unreachable ();
21887 switch (get_attr_size (insn
))
21890 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divsi
;
21893 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divdi
;
21896 gcc_unreachable ();
21901 cost
= n
* rs6000_cost
->fp
;
21904 cost
= n
* rs6000_cost
->dmul
;
21907 cost
= n
* rs6000_cost
->sdiv
;
21910 cost
= n
* rs6000_cost
->ddiv
;
21917 cost
= COSTS_N_INSNS (n
+ 2);
21921 cost
= COSTS_N_INSNS (n
);
21927 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21930 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
21931 addr_space_t as
, bool speed
)
21933 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
21935 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21936 ret
, speed
? "true" : "false");
21943 /* A C expression returning the cost of moving data from a register of class
21944 CLASS1 to one of CLASS2. */
21947 rs6000_register_move_cost (machine_mode mode
,
21948 reg_class_t from
, reg_class_t to
)
21951 reg_class_t rclass
;
21953 if (TARGET_DEBUG_COST
)
21956 /* If we have VSX, we can easily move between FPR or Altivec registers,
21957 otherwise we can only easily move within classes.
21958 Do this first so we give best-case answers for union classes
21959 containing both gprs and vsx regs. */
21960 HARD_REG_SET to_vsx
, from_vsx
;
21961 to_vsx
= reg_class_contents
[to
] & reg_class_contents
[VSX_REGS
];
21962 from_vsx
= reg_class_contents
[from
] & reg_class_contents
[VSX_REGS
];
21963 if (!hard_reg_set_empty_p (to_vsx
)
21964 && !hard_reg_set_empty_p (from_vsx
)
21966 || hard_reg_set_intersect_p (to_vsx
, from_vsx
)))
21968 int reg
= FIRST_FPR_REGNO
;
21970 || (TEST_HARD_REG_BIT (to_vsx
, FIRST_ALTIVEC_REGNO
)
21971 && TEST_HARD_REG_BIT (from_vsx
, FIRST_ALTIVEC_REGNO
)))
21972 reg
= FIRST_ALTIVEC_REGNO
;
21973 ret
= 2 * hard_regno_nregs (reg
, mode
);
21976 /* Moves from/to GENERAL_REGS. */
21977 else if ((rclass
= from
, reg_classes_intersect_p (to
, GENERAL_REGS
))
21978 || (rclass
= to
, reg_classes_intersect_p (from
, GENERAL_REGS
)))
21980 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
21982 if (TARGET_DIRECT_MOVE
)
21984 /* Keep the cost for direct moves above that for within
21985 a register class even if the actual processor cost is
21986 comparable. We do this because a direct move insn
21987 can't be a nop, whereas with ideal register
21988 allocation a move within the same class might turn
21989 out to be a nop. */
21990 if (rs6000_tune
== PROCESSOR_POWER9
21991 || rs6000_tune
== PROCESSOR_POWER10
)
21992 ret
= 3 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
21994 ret
= 4 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
21995 /* SFmode requires a conversion when moving between gprs
21997 if (mode
== SFmode
)
22001 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
22002 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
22005 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22007 else if (rclass
== CR_REGS
)
22010 /* For those processors that have slow LR/CTR moves, make them more
22011 expensive than memory in order to bias spills to memory .*/
22012 else if ((rs6000_tune
== PROCESSOR_POWER6
22013 || rs6000_tune
== PROCESSOR_POWER7
22014 || rs6000_tune
== PROCESSOR_POWER8
22015 || rs6000_tune
== PROCESSOR_POWER9
)
22016 && reg_class_subset_p (rclass
, SPECIAL_REGS
))
22017 ret
= 6 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22020 /* A move will cost one instruction per GPR moved. */
22021 ret
= 2 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22024 /* Everything else has to go through GENERAL_REGS. */
22026 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
22027 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
22029 if (TARGET_DEBUG_COST
)
22031 if (dbg_cost_ctrl
== 1)
22033 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22034 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
22035 reg_class_names
[to
]);
22042 /* A C expressions returning the cost of moving data of MODE from a register to
22046 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
22047 bool in ATTRIBUTE_UNUSED
)
22051 if (TARGET_DEBUG_COST
)
22054 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
22055 ret
= 4 * hard_regno_nregs (0, mode
);
22056 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
22057 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
22058 ret
= 4 * hard_regno_nregs (32, mode
);
22059 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
22060 ret
= 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO
, mode
);
22062 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
22064 if (TARGET_DEBUG_COST
)
22066 if (dbg_cost_ctrl
== 1)
22068 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22069 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
22076 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22078 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22079 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22080 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22081 move cost between GENERAL_REGS and VSX_REGS low.
22083 It might seem reasonable to use a union class. After all, if usage
22084 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22085 rather than memory. However, in cases where register pressure of
22086 both is high, like the cactus_adm spec test, allowing
22087 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22088 the first scheduling pass. This is partly due to an allocno of
22089 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22090 class, which gives too high a pressure for GENERAL_REGS and too low
22091 for VSX_REGS. So, force a choice of the subclass here.
22093 The best class is also the union if GENERAL_REGS and VSX_REGS have
22094 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22095 allocno class, since trying to narrow down the class by regno mode
22096 is prone to error. For example, SImode is allowed in VSX regs and
22097 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22098 it would be wrong to choose an allocno of GENERAL_REGS based on
22102 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED
,
22103 reg_class_t allocno_class
,
22104 reg_class_t best_class
)
22106 switch (allocno_class
)
22108 case GEN_OR_VSX_REGS
:
22109 /* best_class must be a subset of allocno_class. */
22110 gcc_checking_assert (best_class
== GEN_OR_VSX_REGS
22111 || best_class
== GEN_OR_FLOAT_REGS
22112 || best_class
== VSX_REGS
22113 || best_class
== ALTIVEC_REGS
22114 || best_class
== FLOAT_REGS
22115 || best_class
== GENERAL_REGS
22116 || best_class
== BASE_REGS
);
22117 /* Use best_class but choose wider classes when copying from the
22118 wider class to best_class is cheap. This mimics IRA choice
22119 of allocno class. */
22120 if (best_class
== BASE_REGS
)
22121 return GENERAL_REGS
;
22123 && (best_class
== FLOAT_REGS
|| best_class
== ALTIVEC_REGS
))
22131 return allocno_class
;
22134 /* Returns a code for a target-specific builtin that implements
22135 reciprocal of the function, or NULL_TREE if not available. */
22138 rs6000_builtin_reciprocal (tree fndecl
)
22140 switch (DECL_MD_FUNCTION_CODE (fndecl
))
22142 case VSX_BUILTIN_XVSQRTDP
:
22143 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode
))
22146 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
22148 case VSX_BUILTIN_XVSQRTSP
:
22149 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode
))
22152 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_4SF
];
22159 /* Load up a constant. If the mode is a vector mode, splat the value across
22160 all of the vector elements. */
22163 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
22167 if (mode
== SFmode
|| mode
== DFmode
)
22169 rtx d
= const_double_from_real_value (dconst
, mode
);
22170 reg
= force_reg (mode
, d
);
22172 else if (mode
== V4SFmode
)
22174 rtx d
= const_double_from_real_value (dconst
, SFmode
);
22175 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
22176 reg
= gen_reg_rtx (mode
);
22177 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22179 else if (mode
== V2DFmode
)
22181 rtx d
= const_double_from_real_value (dconst
, DFmode
);
22182 rtvec v
= gen_rtvec (2, d
, d
);
22183 reg
= gen_reg_rtx (mode
);
22184 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22187 gcc_unreachable ();
22192 /* Generate an FMA instruction. */
22195 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
22197 machine_mode mode
= GET_MODE (target
);
22200 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
22201 gcc_assert (dst
!= NULL
);
22204 emit_move_insn (target
, dst
);
22207 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22210 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
22212 machine_mode mode
= GET_MODE (dst
);
22215 /* This is a tad more complicated, since the fnma_optab is for
22216 a different expression: fma(-m1, m2, a), which is the same
22217 thing except in the case of signed zeros.
22219 Fortunately we know that if FMA is supported that FNMSUB is
22220 also supported in the ISA. Just expand it directly. */
22222 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
22224 r
= gen_rtx_NEG (mode
, a
);
22225 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
22226 r
= gen_rtx_NEG (mode
, r
);
22227 emit_insn (gen_rtx_SET (dst
, r
));
22230 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22231 add a reg_note saying that this was a division. Support both scalar and
22232 vector divide. Assumes no trapping math and finite arguments. */
22235 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
22237 machine_mode mode
= GET_MODE (dst
);
22238 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
22241 /* Low precision estimates guarantee 5 bits of accuracy. High
22242 precision estimates guarantee 14 bits of accuracy. SFmode
22243 requires 23 bits of accuracy. DFmode requires 52 bits of
22244 accuracy. Each pass at least doubles the accuracy, leading
22245 to the following. */
22246 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22247 if (mode
== DFmode
|| mode
== V2DFmode
)
22250 enum insn_code code
= optab_handler (smul_optab
, mode
);
22251 insn_gen_fn gen_mul
= GEN_FCN (code
);
22253 gcc_assert (code
!= CODE_FOR_nothing
);
22255 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
22257 /* x0 = 1./d estimate */
22258 x0
= gen_reg_rtx (mode
);
22259 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
22262 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22265 /* e0 = 1. - d * x0 */
22266 e0
= gen_reg_rtx (mode
);
22267 rs6000_emit_nmsub (e0
, d
, x0
, one
);
22269 /* x1 = x0 + e0 * x0 */
22270 x1
= gen_reg_rtx (mode
);
22271 rs6000_emit_madd (x1
, e0
, x0
, x0
);
22273 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
22274 ++i
, xprev
= xnext
, eprev
= enext
) {
22276 /* enext = eprev * eprev */
22277 enext
= gen_reg_rtx (mode
);
22278 emit_insn (gen_mul (enext
, eprev
, eprev
));
22280 /* xnext = xprev + enext * xprev */
22281 xnext
= gen_reg_rtx (mode
);
22282 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
22288 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22290 /* u = n * xprev */
22291 u
= gen_reg_rtx (mode
);
22292 emit_insn (gen_mul (u
, n
, xprev
));
22294 /* v = n - (d * u) */
22295 v
= gen_reg_rtx (mode
);
22296 rs6000_emit_nmsub (v
, d
, u
, n
);
22298 /* dst = (v * xprev) + u */
22299 rs6000_emit_madd (dst
, v
, xprev
, u
);
22302 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
22305 /* Goldschmidt's Algorithm for single/double-precision floating point
22306 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22309 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
22311 machine_mode mode
= GET_MODE (src
);
22312 rtx e
= gen_reg_rtx (mode
);
22313 rtx g
= gen_reg_rtx (mode
);
22314 rtx h
= gen_reg_rtx (mode
);
22316 /* Low precision estimates guarantee 5 bits of accuracy. High
22317 precision estimates guarantee 14 bits of accuracy. SFmode
22318 requires 23 bits of accuracy. DFmode requires 52 bits of
22319 accuracy. Each pass at least doubles the accuracy, leading
22320 to the following. */
22321 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22322 if (mode
== DFmode
|| mode
== V2DFmode
)
22327 enum insn_code code
= optab_handler (smul_optab
, mode
);
22328 insn_gen_fn gen_mul
= GEN_FCN (code
);
22330 gcc_assert (code
!= CODE_FOR_nothing
);
22332 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
22334 /* e = rsqrt estimate */
22335 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
22338 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22341 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
22343 if (mode
== SFmode
)
22345 rtx target
= emit_conditional_move (e
, GT
, src
, zero
, mode
,
22348 emit_move_insn (e
, target
);
22352 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
22353 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
22357 /* g = sqrt estimate. */
22358 emit_insn (gen_mul (g
, e
, src
));
22359 /* h = 1/(2*sqrt) estimate. */
22360 emit_insn (gen_mul (h
, e
, mhalf
));
22366 rtx t
= gen_reg_rtx (mode
);
22367 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
22368 /* Apply correction directly to 1/rsqrt estimate. */
22369 rs6000_emit_madd (dst
, e
, t
, e
);
22373 for (i
= 0; i
< passes
; i
++)
22375 rtx t1
= gen_reg_rtx (mode
);
22376 rtx g1
= gen_reg_rtx (mode
);
22377 rtx h1
= gen_reg_rtx (mode
);
22379 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
22380 rs6000_emit_madd (g1
, g
, t1
, g
);
22381 rs6000_emit_madd (h1
, h
, t1
, h
);
22386 /* Multiply by 2 for 1/rsqrt. */
22387 emit_insn (gen_add3_insn (dst
, h
, h
));
22392 rtx t
= gen_reg_rtx (mode
);
22393 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
22394 rs6000_emit_madd (dst
, g
, t
, g
);
22400 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22401 (Power7) targets. DST is the target, and SRC is the argument operand. */
22404 rs6000_emit_popcount (rtx dst
, rtx src
)
22406 machine_mode mode
= GET_MODE (dst
);
22409 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22410 if (TARGET_POPCNTD
)
22412 if (mode
== SImode
)
22413 emit_insn (gen_popcntdsi2 (dst
, src
));
22415 emit_insn (gen_popcntddi2 (dst
, src
));
22419 tmp1
= gen_reg_rtx (mode
);
22421 if (mode
== SImode
)
22423 emit_insn (gen_popcntbsi2 (tmp1
, src
));
22424 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
22426 tmp2
= force_reg (SImode
, tmp2
);
22427 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
22431 emit_insn (gen_popcntbdi2 (tmp1
, src
));
22432 tmp2
= expand_mult (DImode
, tmp1
,
22433 GEN_INT ((HOST_WIDE_INT
)
22434 0x01010101 << 32 | 0x01010101),
22436 tmp2
= force_reg (DImode
, tmp2
);
22437 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
22442 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22443 target, and SRC is the argument operand. */
22446 rs6000_emit_parity (rtx dst
, rtx src
)
22448 machine_mode mode
= GET_MODE (dst
);
22451 tmp
= gen_reg_rtx (mode
);
22453 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22456 if (mode
== SImode
)
22458 emit_insn (gen_popcntbsi2 (tmp
, src
));
22459 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
22463 emit_insn (gen_popcntbdi2 (tmp
, src
));
22464 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
22469 if (mode
== SImode
)
22471 /* Is mult+shift >= shift+xor+shift+xor? */
22472 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
22474 rtx tmp1
, tmp2
, tmp3
, tmp4
;
22476 tmp1
= gen_reg_rtx (SImode
);
22477 emit_insn (gen_popcntbsi2 (tmp1
, src
));
22479 tmp2
= gen_reg_rtx (SImode
);
22480 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
22481 tmp3
= gen_reg_rtx (SImode
);
22482 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
22484 tmp4
= gen_reg_rtx (SImode
);
22485 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
22486 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
22489 rs6000_emit_popcount (tmp
, src
);
22490 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
22494 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22495 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
22497 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
22499 tmp1
= gen_reg_rtx (DImode
);
22500 emit_insn (gen_popcntbdi2 (tmp1
, src
));
22502 tmp2
= gen_reg_rtx (DImode
);
22503 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
22504 tmp3
= gen_reg_rtx (DImode
);
22505 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
22507 tmp4
= gen_reg_rtx (DImode
);
22508 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
22509 tmp5
= gen_reg_rtx (DImode
);
22510 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
22512 tmp6
= gen_reg_rtx (DImode
);
22513 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
22514 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
22517 rs6000_emit_popcount (tmp
, src
);
22518 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
22522 /* Expand an Altivec constant permutation for little endian mode.
22523 OP0 and OP1 are the input vectors and TARGET is the output vector.
22524 SEL specifies the constant permutation vector.
22526 There are two issues: First, the two input operands must be
22527 swapped so that together they form a double-wide array in LE
22528 order. Second, the vperm instruction has surprising behavior
22529 in LE mode: it interprets the elements of the source vectors
22530 in BE mode ("left to right") and interprets the elements of
22531 the destination vector in LE mode ("right to left"). To
22532 correct for this, we must subtract each element of the permute
22533 control vector from 31.
22535 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22536 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22537 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22538 serve as the permute control vector. Then, in BE mode,
22542 places the desired result in vr9. However, in LE mode the
22543 vector contents will be
22545 vr10 = 00000003 00000002 00000001 00000000
22546 vr11 = 00000007 00000006 00000005 00000004
22548 The result of the vperm using the same permute control vector is
22550 vr9 = 05000000 07000000 01000000 03000000
22552 That is, the leftmost 4 bytes of vr10 are interpreted as the
22553 source for the rightmost 4 bytes of vr9, and so on.
22555 If we change the permute control vector to
22557 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22565 vr9 = 00000006 00000004 00000002 00000000. */
22568 altivec_expand_vec_perm_const_le (rtx target
, rtx op0
, rtx op1
,
22569 const vec_perm_indices
&sel
)
22573 rtx constv
, unspec
;
22575 /* Unpack and adjust the constant selector. */
22576 for (i
= 0; i
< 16; ++i
)
22578 unsigned int elt
= 31 - (sel
[i
] & 31);
22579 perm
[i
] = GEN_INT (elt
);
22582 /* Expand to a permute, swapping the inputs and using the
22583 adjusted selector. */
22585 op0
= force_reg (V16QImode
, op0
);
22587 op1
= force_reg (V16QImode
, op1
);
22589 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
22590 constv
= force_reg (V16QImode
, constv
);
22591 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
22593 if (!REG_P (target
))
22595 rtx tmp
= gen_reg_rtx (V16QImode
);
22596 emit_move_insn (tmp
, unspec
);
22600 emit_move_insn (target
, unspec
);
22603 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22604 permute control vector. But here it's not a constant, so we must
22605 generate a vector NAND or NOR to do the adjustment. */
22608 altivec_expand_vec_perm_le (rtx operands
[4])
22610 rtx notx
, iorx
, unspec
;
22611 rtx target
= operands
[0];
22612 rtx op0
= operands
[1];
22613 rtx op1
= operands
[2];
22614 rtx sel
= operands
[3];
22616 rtx norreg
= gen_reg_rtx (V16QImode
);
22617 machine_mode mode
= GET_MODE (target
);
22619 /* Get everything in regs so the pattern matches. */
22621 op0
= force_reg (mode
, op0
);
22623 op1
= force_reg (mode
, op1
);
22625 sel
= force_reg (V16QImode
, sel
);
22626 if (!REG_P (target
))
22627 tmp
= gen_reg_rtx (mode
);
22629 if (TARGET_P9_VECTOR
)
22631 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, sel
),
22636 /* Invert the selector with a VNAND if available, else a VNOR.
22637 The VNAND is preferred for future fusion opportunities. */
22638 notx
= gen_rtx_NOT (V16QImode
, sel
);
22639 iorx
= (TARGET_P8_VECTOR
22640 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
22641 : gen_rtx_AND (V16QImode
, notx
, notx
));
22642 emit_insn (gen_rtx_SET (norreg
, iorx
));
22644 /* Permute with operands reversed and adjusted selector. */
22645 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
22649 /* Copy into target, possibly by way of a register. */
22650 if (!REG_P (target
))
22652 emit_move_insn (tmp
, unspec
);
22656 emit_move_insn (target
, unspec
);
22659 /* Expand an Altivec constant permutation. Return true if we match
22660 an efficient implementation; false to fall back to VPERM.
22662 OP0 and OP1 are the input vectors and TARGET is the output vector.
22663 SEL specifies the constant permutation vector. */
22666 altivec_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
,
22667 const vec_perm_indices
&sel
)
22669 struct altivec_perm_insn
{
22670 HOST_WIDE_INT mask
;
22671 enum insn_code impl
;
22672 unsigned char perm
[16];
22674 static const struct altivec_perm_insn patterns
[] = {
22675 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuhum_direct
,
22676 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
22677 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuwum_direct
,
22678 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
22679 { OPTION_MASK_ALTIVEC
,
22680 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
22681 : CODE_FOR_altivec_vmrglb_direct
),
22682 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
22683 { OPTION_MASK_ALTIVEC
,
22684 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
22685 : CODE_FOR_altivec_vmrglh_direct
),
22686 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
22687 { OPTION_MASK_ALTIVEC
,
22688 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct
22689 : CODE_FOR_altivec_vmrglw_direct
),
22690 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
22691 { OPTION_MASK_ALTIVEC
,
22692 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
22693 : CODE_FOR_altivec_vmrghb_direct
),
22694 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
22695 { OPTION_MASK_ALTIVEC
,
22696 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
22697 : CODE_FOR_altivec_vmrghh_direct
),
22698 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
22699 { OPTION_MASK_ALTIVEC
,
22700 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct
22701 : CODE_FOR_altivec_vmrghw_direct
),
22702 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
22703 { OPTION_MASK_P8_VECTOR
,
22704 (BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgew_v4sf_direct
22705 : CODE_FOR_p8_vmrgow_v4sf_direct
),
22706 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
22707 { OPTION_MASK_P8_VECTOR
,
22708 (BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgow_v4sf_direct
22709 : CODE_FOR_p8_vmrgew_v4sf_direct
),
22710 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
22713 unsigned int i
, j
, elt
, which
;
22714 unsigned char perm
[16];
22718 /* Unpack the constant selector. */
22719 for (i
= which
= 0; i
< 16; ++i
)
22722 which
|= (elt
< 16 ? 1 : 2);
22726 /* Simplify the constant selector based on operands. */
22730 gcc_unreachable ();
22734 if (!rtx_equal_p (op0
, op1
))
22739 for (i
= 0; i
< 16; ++i
)
22751 /* Look for splat patterns. */
22756 for (i
= 0; i
< 16; ++i
)
22757 if (perm
[i
] != elt
)
22761 if (!BYTES_BIG_ENDIAN
)
22763 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
22769 for (i
= 0; i
< 16; i
+= 2)
22770 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
22774 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
22775 x
= gen_reg_rtx (V8HImode
);
22776 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
22778 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
22785 for (i
= 0; i
< 16; i
+= 4)
22787 || perm
[i
+ 1] != elt
+ 1
22788 || perm
[i
+ 2] != elt
+ 2
22789 || perm
[i
+ 3] != elt
+ 3)
22793 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
22794 x
= gen_reg_rtx (V4SImode
);
22795 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
22797 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
22803 /* Look for merge and pack patterns. */
22804 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
22808 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
22811 elt
= patterns
[j
].perm
[0];
22812 if (perm
[0] == elt
)
22814 else if (perm
[0] == elt
+ 16)
22818 for (i
= 1; i
< 16; ++i
)
22820 elt
= patterns
[j
].perm
[i
];
22822 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
22823 else if (one_vec
&& elt
>= 16)
22825 if (perm
[i
] != elt
)
22830 enum insn_code icode
= patterns
[j
].impl
;
22831 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
22832 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
22834 /* For little-endian, don't use vpkuwum and vpkuhum if the
22835 underlying vector type is not V4SI and V8HI, respectively.
22836 For example, using vpkuwum with a V8HI picks up the even
22837 halfwords (BE numbering) when the even halfwords (LE
22838 numbering) are what we need. */
22839 if (!BYTES_BIG_ENDIAN
22840 && icode
== CODE_FOR_altivec_vpkuwum_direct
22842 && GET_MODE (op0
) != V4SImode
)
22844 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
22846 if (!BYTES_BIG_ENDIAN
22847 && icode
== CODE_FOR_altivec_vpkuhum_direct
22849 && GET_MODE (op0
) != V8HImode
)
22851 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
22854 /* For little-endian, the two input operands must be swapped
22855 (or swapped back) to ensure proper right-to-left numbering
22857 if (swapped
^ !BYTES_BIG_ENDIAN
)
22858 std::swap (op0
, op1
);
22859 if (imode
!= V16QImode
)
22861 op0
= gen_lowpart (imode
, op0
);
22862 op1
= gen_lowpart (imode
, op1
);
22864 if (omode
== V16QImode
)
22867 x
= gen_reg_rtx (omode
);
22868 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
22869 if (omode
!= V16QImode
)
22870 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
22875 if (!BYTES_BIG_ENDIAN
)
22877 altivec_expand_vec_perm_const_le (target
, op0
, op1
, sel
);
22884 /* Expand a VSX Permute Doubleword constant permutation.
22885 Return true if we match an efficient implementation. */
22888 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
22889 unsigned char perm0
, unsigned char perm1
)
22893 /* If both selectors come from the same operand, fold to single op. */
22894 if ((perm0
& 2) == (perm1
& 2))
22901 /* If both operands are equal, fold to simpler permutation. */
22902 if (rtx_equal_p (op0
, op1
))
22905 perm1
= (perm1
& 1) + 2;
22907 /* If the first selector comes from the second operand, swap. */
22908 else if (perm0
& 2)
22914 std::swap (op0
, op1
);
22916 /* If the second selector does not come from the second operand, fail. */
22917 else if ((perm1
& 2) == 0)
22921 if (target
!= NULL
)
22923 machine_mode vmode
, dmode
;
22926 vmode
= GET_MODE (target
);
22927 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
22928 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4).require ();
22929 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
22930 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
22931 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
22932 emit_insn (gen_rtx_SET (target
, x
));
22937 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22940 rs6000_vectorize_vec_perm_const (machine_mode vmode
, rtx target
, rtx op0
,
22941 rtx op1
, const vec_perm_indices
&sel
)
22943 bool testing_p
= !target
;
22945 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22946 if (TARGET_ALTIVEC
&& testing_p
)
22951 rtx nop0
= force_reg (vmode
, op0
);
22957 op1
= force_reg (vmode
, op1
);
22959 /* Check for ps_merge* or xxpermdi insns. */
22960 if ((vmode
== V2DFmode
|| vmode
== V2DImode
) && VECTOR_MEM_VSX_P (vmode
))
22964 op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
22965 op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
22967 if (rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, sel
[0], sel
[1]))
22971 if (TARGET_ALTIVEC
)
22973 /* Force the target-independent code to lower to V16QImode. */
22974 if (vmode
!= V16QImode
)
22976 if (altivec_expand_vec_perm_const (target
, op0
, op1
, sel
))
22983 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22984 OP0 and OP1 are the input vectors and TARGET is the output vector.
22985 PERM specifies the constant permutation vector. */
22988 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
22989 machine_mode vmode
, const vec_perm_builder
&perm
)
22991 rtx x
= expand_vec_perm_const (vmode
, op0
, op1
, perm
, BLKmode
, target
);
22993 emit_move_insn (target
, x
);
22996 /* Expand an extract even operation. */
22999 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
23001 machine_mode vmode
= GET_MODE (target
);
23002 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
23003 vec_perm_builder
perm (nelt
, nelt
, 1);
23005 for (i
= 0; i
< nelt
; i
++)
23006 perm
.quick_push (i
* 2);
23008 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23011 /* Expand a vector interleave operation. */
23014 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
23016 machine_mode vmode
= GET_MODE (target
);
23017 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
23018 vec_perm_builder
perm (nelt
, nelt
, 1);
23020 high
= (highp
? 0 : nelt
/ 2);
23021 for (i
= 0; i
< nelt
/ 2; i
++)
23023 perm
.quick_push (i
+ high
);
23024 perm
.quick_push (i
+ nelt
+ high
);
23027 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23030 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23032 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
23034 HOST_WIDE_INT
hwi_scale (scale
);
23035 REAL_VALUE_TYPE r_pow
;
23036 rtvec v
= rtvec_alloc (2);
23038 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
23039 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
23040 elt
= const_double_from_real_value (r_pow
, DFmode
);
23041 RTVEC_ELT (v
, 0) = elt
;
23042 RTVEC_ELT (v
, 1) = elt
;
23043 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
23044 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
23047 /* Return an RTX representing where to find the function value of a
23048 function returning MODE. */
23050 rs6000_complex_function_value (machine_mode mode
)
23052 unsigned int regno
;
23054 machine_mode inner
= GET_MODE_INNER (mode
);
23055 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
23057 if (TARGET_FLOAT128_TYPE
23059 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
23060 regno
= ALTIVEC_ARG_RETURN
;
23062 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23063 regno
= FP_ARG_RETURN
;
23067 regno
= GP_ARG_RETURN
;
23069 /* 32-bit is OK since it'll go in r3/r4. */
23070 if (TARGET_32BIT
&& inner_bytes
>= 4)
23071 return gen_rtx_REG (mode
, regno
);
23074 if (inner_bytes
>= 8)
23075 return gen_rtx_REG (mode
, regno
);
23077 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
23079 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
23080 GEN_INT (inner_bytes
));
23081 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
23084 /* Return an rtx describing a return value of MODE as a PARALLEL
23085 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23086 stride REG_STRIDE. */
23089 rs6000_parallel_return (machine_mode mode
,
23090 int n_elts
, machine_mode elt_mode
,
23091 unsigned int regno
, unsigned int reg_stride
)
23093 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
23096 for (i
= 0; i
< n_elts
; i
++)
23098 rtx r
= gen_rtx_REG (elt_mode
, regno
);
23099 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
23100 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
23101 regno
+= reg_stride
;
23107 /* Target hook for TARGET_FUNCTION_VALUE.
23109 An integer value is in r3 and a floating-point value is in fp1,
23110 unless -msoft-float. */
23113 rs6000_function_value (const_tree valtype
,
23114 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
23115 bool outgoing ATTRIBUTE_UNUSED
)
23118 unsigned int regno
;
23119 machine_mode elt_mode
;
23122 /* Special handling for structs in darwin64. */
23124 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
23126 CUMULATIVE_ARGS valcum
;
23130 valcum
.fregno
= FP_ARG_MIN_REG
;
23131 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
23132 /* Do a trial code generation as if this were going to be passed as
23133 an argument; if any part goes in memory, we return NULL. */
23134 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
23137 /* Otherwise fall through to standard ABI rules. */
23140 mode
= TYPE_MODE (valtype
);
23142 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23143 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
23145 int first_reg
, n_regs
;
23147 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
23149 /* _Decimal128 must use even/odd register pairs. */
23150 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23151 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
23155 first_reg
= ALTIVEC_ARG_RETURN
;
23159 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
23162 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23163 if (TARGET_32BIT
&& TARGET_POWERPC64
)
23172 int count
= GET_MODE_SIZE (mode
) / 4;
23173 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
23176 if ((INTEGRAL_TYPE_P (valtype
)
23177 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
23178 || POINTER_TYPE_P (valtype
))
23179 mode
= TARGET_32BIT
? SImode
: DImode
;
23181 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23182 /* _Decimal128 must use an even/odd register pair. */
23183 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23184 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
23185 && !FLOAT128_VECTOR_P (mode
))
23186 regno
= FP_ARG_RETURN
;
23187 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
23188 && targetm
.calls
.split_complex_arg
)
23189 return rs6000_complex_function_value (mode
);
23190 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23191 return register is used in both cases, and we won't see V2DImode/V2DFmode
23192 for pure altivec, combine the two cases. */
23193 else if ((TREE_CODE (valtype
) == VECTOR_TYPE
|| VECTOR_ALIGNMENT_P (mode
))
23194 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
23195 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
23196 regno
= ALTIVEC_ARG_RETURN
;
23198 regno
= GP_ARG_RETURN
;
23200 return gen_rtx_REG (mode
, regno
);
23203 /* Define how to find the value returned by a library function
23204 assuming the value has mode MODE. */
23206 rs6000_libcall_value (machine_mode mode
)
23208 unsigned int regno
;
23210 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23211 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
23212 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
23214 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23215 /* _Decimal128 must use an even/odd register pair. */
23216 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23217 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && TARGET_HARD_FLOAT
)
23218 regno
= FP_ARG_RETURN
;
23219 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23220 return register is used in both cases, and we won't see V2DImode/V2DFmode
23221 for pure altivec, combine the two cases. */
23222 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
23223 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
23224 regno
= ALTIVEC_ARG_RETURN
;
23225 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
23226 return rs6000_complex_function_value (mode
);
23228 regno
= GP_ARG_RETURN
;
23230 return gen_rtx_REG (mode
, regno
);
23233 /* Compute register pressure classes. We implement the target hook to avoid
23234 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23235 lead to incorrect estimates of number of available registers and therefor
23236 increased register pressure/spill. */
23238 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
23243 pressure_classes
[n
++] = GENERAL_REGS
;
23245 pressure_classes
[n
++] = VSX_REGS
;
23248 if (TARGET_ALTIVEC
)
23249 pressure_classes
[n
++] = ALTIVEC_REGS
;
23250 if (TARGET_HARD_FLOAT
)
23251 pressure_classes
[n
++] = FLOAT_REGS
;
23253 pressure_classes
[n
++] = CR_REGS
;
23254 pressure_classes
[n
++] = SPECIAL_REGS
;
23259 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23260 Frame pointer elimination is automatically handled.
23262 For the RS/6000, if frame pointer elimination is being done, we would like
23263 to convert ap into fp, not sp.
23265 We need r30 if -mminimal-toc was specified, and there are constant pool
23269 rs6000_can_eliminate (const int from
, const int to
)
23271 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
23272 ? ! frame_pointer_needed
23273 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
23274 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC_OR_PCREL
23275 || constant_pool_empty_p ()
23279 /* Define the offset between two registers, FROM to be eliminated and its
23280 replacement TO, at the start of a routine. */
23282 rs6000_initial_elimination_offset (int from
, int to
)
23284 rs6000_stack_t
*info
= rs6000_stack_info ();
23285 HOST_WIDE_INT offset
;
23287 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23288 offset
= info
->push_p
? 0 : -info
->total_size
;
23289 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23291 offset
= info
->push_p
? 0 : -info
->total_size
;
23292 if (FRAME_GROWS_DOWNWARD
)
23293 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
23295 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
23296 offset
= FRAME_GROWS_DOWNWARD
23297 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
23299 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
23300 offset
= info
->total_size
;
23301 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23302 offset
= info
->push_p
? info
->total_size
: 0;
23303 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
23306 gcc_unreachable ();
23311 /* Fill in sizes of registers used by unwinder. */
23314 rs6000_init_dwarf_reg_sizes_extra (tree address
)
23316 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
23319 machine_mode mode
= TYPE_MODE (char_type_node
);
23320 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
23321 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
23322 rtx value
= gen_int_mode (16, mode
);
23324 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23325 The unwinder still needs to know the size of Altivec registers. */
23327 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
23329 int column
= DWARF_REG_TO_UNWIND_COLUMN
23330 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
23331 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
23333 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
23338 /* Map internal gcc register numbers to debug format register numbers.
23339 FORMAT specifies the type of debug register number to use:
23340 0 -- debug information, except for frame-related sections
23341 1 -- DWARF .debug_frame section
23342 2 -- DWARF .eh_frame section */
23345 rs6000_dbx_register_number (unsigned int regno
, unsigned int format
)
23347 /* On some platforms, we use the standard DWARF register
23348 numbering for .debug_info and .debug_frame. */
23349 if ((format
== 0 && write_symbols
== DWARF2_DEBUG
) || format
== 1)
23351 #ifdef RS6000_USE_DWARF_NUMBERING
23354 if (FP_REGNO_P (regno
))
23355 return regno
- FIRST_FPR_REGNO
+ 32;
23356 if (ALTIVEC_REGNO_P (regno
))
23357 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
23358 if (regno
== LR_REGNO
)
23360 if (regno
== CTR_REGNO
)
23362 if (regno
== CA_REGNO
)
23363 return 101; /* XER */
23364 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23365 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23366 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23367 to the DWARF reg for CR. */
23368 if (format
== 1 && regno
== CR2_REGNO
)
23370 if (CR_REGNO_P (regno
))
23371 return regno
- CR0_REGNO
+ 86;
23372 if (regno
== VRSAVE_REGNO
)
23374 if (regno
== VSCR_REGNO
)
23377 /* These do not make much sense. */
23378 if (regno
== FRAME_POINTER_REGNUM
)
23380 if (regno
== ARG_POINTER_REGNUM
)
23385 gcc_unreachable ();
23389 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23390 information, and also for .eh_frame. */
23391 /* Translate the regnos to their numbers in GCC 7 (and before). */
23394 if (FP_REGNO_P (regno
))
23395 return regno
- FIRST_FPR_REGNO
+ 32;
23396 if (ALTIVEC_REGNO_P (regno
))
23397 return regno
- FIRST_ALTIVEC_REGNO
+ 77;
23398 if (regno
== LR_REGNO
)
23400 if (regno
== CTR_REGNO
)
23402 if (regno
== CA_REGNO
)
23403 return 76; /* XER */
23404 if (CR_REGNO_P (regno
))
23405 return regno
- CR0_REGNO
+ 68;
23406 if (regno
== VRSAVE_REGNO
)
23408 if (regno
== VSCR_REGNO
)
23411 if (regno
== FRAME_POINTER_REGNUM
)
23413 if (regno
== ARG_POINTER_REGNUM
)
23418 gcc_unreachable ();
23421 /* target hook eh_return_filter_mode */
23422 static scalar_int_mode
23423 rs6000_eh_return_filter_mode (void)
23425 return TARGET_32BIT
? SImode
: word_mode
;
23428 /* Target hook for translate_mode_attribute. */
23429 static machine_mode
23430 rs6000_translate_mode_attribute (machine_mode mode
)
23432 if ((FLOAT128_IEEE_P (mode
)
23433 && ieee128_float_type_node
== long_double_type_node
)
23434 || (FLOAT128_IBM_P (mode
)
23435 && ibm128_float_type_node
== long_double_type_node
))
23436 return COMPLEX_MODE_P (mode
) ? E_TCmode
: E_TFmode
;
23440 /* Target hook for scalar_mode_supported_p. */
23442 rs6000_scalar_mode_supported_p (scalar_mode mode
)
23444 /* -m32 does not support TImode. This is the default, from
23445 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23446 same ABI as for -m32. But default_scalar_mode_supported_p allows
23447 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23448 for -mpowerpc64. */
23449 if (TARGET_32BIT
&& mode
== TImode
)
23452 if (DECIMAL_FLOAT_MODE_P (mode
))
23453 return default_decimal_float_supported_p ();
23454 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
23457 return default_scalar_mode_supported_p (mode
);
23460 /* Target hook for vector_mode_supported_p. */
23462 rs6000_vector_mode_supported_p (machine_mode mode
)
23464 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23465 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23467 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
23474 /* Target hook for floatn_mode. */
23475 static opt_scalar_float_mode
23476 rs6000_floatn_mode (int n
, bool extended
)
23486 if (TARGET_FLOAT128_TYPE
)
23487 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
23489 return opt_scalar_float_mode ();
23492 return opt_scalar_float_mode ();
23495 /* Those are the only valid _FloatNx types. */
23496 gcc_unreachable ();
23510 if (TARGET_FLOAT128_TYPE
)
23511 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
23513 return opt_scalar_float_mode ();
23516 return opt_scalar_float_mode ();
23522 /* Target hook for c_mode_for_suffix. */
23523 static machine_mode
23524 rs6000_c_mode_for_suffix (char suffix
)
23526 if (TARGET_FLOAT128_TYPE
)
23528 if (suffix
== 'q' || suffix
== 'Q')
23529 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
23531 /* At the moment, we are not defining a suffix for IBM extended double.
23532 If/when the default for -mabi=ieeelongdouble is changed, and we want
23533 to support __ibm128 constants in legacy library code, we may need to
23534 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
23535 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
23536 __float80 constants. */
23542 /* Target hook for invalid_arg_for_unprototyped_fn. */
23543 static const char *
23544 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
23546 return (!rs6000_darwin64_abi
23548 && TREE_CODE (TREE_TYPE (val
)) == VECTOR_TYPE
23549 && (funcdecl
== NULL_TREE
23550 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
23551 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
23552 ? N_("AltiVec argument passed to unprototyped function")
23556 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23557 setup by using __stack_chk_fail_local hidden function instead of
23558 calling __stack_chk_fail directly. Otherwise it is better to call
23559 __stack_chk_fail directly. */
23561 static tree ATTRIBUTE_UNUSED
23562 rs6000_stack_protect_fail (void)
23564 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
23565 ? default_hidden_stack_protect_fail ()
23566 : default_external_stack_protect_fail ();
23569 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23572 static unsigned HOST_WIDE_INT
23573 rs6000_asan_shadow_offset (void)
23575 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
23579 /* Mask options that we want to support inside of attribute((target)) and
23580 #pragma GCC target operations. Note, we do not include things like
23581 64/32-bit, endianness, hard/soft floating point, etc. that would have
23582 different calling sequences. */
23584 struct rs6000_opt_mask
{
23585 const char *name
; /* option name */
23586 HOST_WIDE_INT mask
; /* mask to set */
23587 bool invert
; /* invert sense of mask */
23588 bool valid_target
; /* option is a target option */
23591 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
23593 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
23594 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
,
23596 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
,
23598 { "cmpb", OPTION_MASK_CMPB
, false, true },
23599 { "crypto", OPTION_MASK_CRYPTO
, false, true },
23600 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
23601 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
23602 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
23604 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, true },
23605 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, true },
23606 { "fprnd", OPTION_MASK_FPRND
, false, true },
23607 { "power10", OPTION_MASK_POWER10
, false, true },
23608 { "hard-dfp", OPTION_MASK_DFP
, false, true },
23609 { "htm", OPTION_MASK_HTM
, false, true },
23610 { "isel", OPTION_MASK_ISEL
, false, true },
23611 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
23612 { "mfpgpr", 0, false, true },
23613 { "mma", OPTION_MASK_MMA
, false, true },
23614 { "modulo", OPTION_MASK_MODULO
, false, true },
23615 { "mulhw", OPTION_MASK_MULHW
, false, true },
23616 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
23617 { "pcrel", OPTION_MASK_PCREL
, false, true },
23618 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
23619 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
23620 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
23621 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
23622 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
23623 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
23624 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
23625 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
23626 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
23627 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
23628 { "prefixed", OPTION_MASK_PREFIXED
, false, true },
23629 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
23630 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
23631 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
23632 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
23633 { "string", 0, false, true },
23634 { "update", OPTION_MASK_NO_UPDATE
, true , true },
23635 { "vsx", OPTION_MASK_VSX
, false, true },
23636 #ifdef OPTION_MASK_64BIT
23638 { "aix64", OPTION_MASK_64BIT
, false, false },
23639 { "aix32", OPTION_MASK_64BIT
, true, false },
23641 { "64", OPTION_MASK_64BIT
, false, false },
23642 { "32", OPTION_MASK_64BIT
, true, false },
23645 #ifdef OPTION_MASK_EABI
23646 { "eabi", OPTION_MASK_EABI
, false, false },
23648 #ifdef OPTION_MASK_LITTLE_ENDIAN
23649 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
23650 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
23652 #ifdef OPTION_MASK_RELOCATABLE
23653 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
23655 #ifdef OPTION_MASK_STRICT_ALIGN
23656 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
23658 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
23659 { "string", 0, false, false },
23662 /* Builtin mask mapping for printing the flags. */
23663 static struct rs6000_opt_mask
const rs6000_builtin_mask_names
[] =
23665 { "altivec", RS6000_BTM_ALTIVEC
, false, false },
23666 { "vsx", RS6000_BTM_VSX
, false, false },
23667 { "fre", RS6000_BTM_FRE
, false, false },
23668 { "fres", RS6000_BTM_FRES
, false, false },
23669 { "frsqrte", RS6000_BTM_FRSQRTE
, false, false },
23670 { "frsqrtes", RS6000_BTM_FRSQRTES
, false, false },
23671 { "popcntd", RS6000_BTM_POPCNTD
, false, false },
23672 { "cell", RS6000_BTM_CELL
, false, false },
23673 { "power8-vector", RS6000_BTM_P8_VECTOR
, false, false },
23674 { "power9-vector", RS6000_BTM_P9_VECTOR
, false, false },
23675 { "power9-misc", RS6000_BTM_P9_MISC
, false, false },
23676 { "crypto", RS6000_BTM_CRYPTO
, false, false },
23677 { "htm", RS6000_BTM_HTM
, false, false },
23678 { "hard-dfp", RS6000_BTM_DFP
, false, false },
23679 { "hard-float", RS6000_BTM_HARD_FLOAT
, false, false },
23680 { "long-double-128", RS6000_BTM_LDBL128
, false, false },
23681 { "powerpc64", RS6000_BTM_POWERPC64
, false, false },
23682 { "float128", RS6000_BTM_FLOAT128
, false, false },
23683 { "float128-hw", RS6000_BTM_FLOAT128_HW
,false, false },
23684 { "mma", RS6000_BTM_MMA
, false, false },
23685 { "power10", RS6000_BTM_P10
, false, false },
23688 /* Option variables that we want to support inside attribute((target)) and
23689 #pragma GCC target operations. */
23691 struct rs6000_opt_var
{
23692 const char *name
; /* option name */
23693 size_t global_offset
; /* offset of the option in global_options. */
23694 size_t target_offset
; /* offset of the option in target options. */
23697 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
23700 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
23701 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
23702 { "avoid-indexed-addresses",
23703 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
23704 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
23706 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
23707 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
23708 { "optimize-swaps",
23709 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
23710 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
23711 { "allow-movmisalign",
23712 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
23713 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
23715 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
23716 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
23718 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
23719 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
23720 { "align-branch-targets",
23721 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
23722 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
23724 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
23725 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
23727 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
23728 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
23729 { "speculate-indirect-jumps",
23730 offsetof (struct gcc_options
, x_rs6000_speculate_indirect_jumps
),
23731 offsetof (struct cl_target_option
, x_rs6000_speculate_indirect_jumps
), },
23734 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
23735 parsing. Return true if there were no errors. */
23738 rs6000_inner_target_options (tree args
, bool attr_p
)
23742 if (args
== NULL_TREE
)
23745 else if (TREE_CODE (args
) == STRING_CST
)
23747 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
23750 while ((q
= strtok (p
, ",")) != NULL
)
23752 bool error_p
= false;
23753 bool not_valid_p
= false;
23754 const char *cpu_opt
= NULL
;
23757 if (strncmp (q
, "cpu=", 4) == 0)
23759 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
23760 if (cpu_index
>= 0)
23761 rs6000_cpu_index
= cpu_index
;
23768 else if (strncmp (q
, "tune=", 5) == 0)
23770 int tune_index
= rs6000_cpu_name_lookup (q
+5);
23771 if (tune_index
>= 0)
23772 rs6000_tune_index
= tune_index
;
23782 bool invert
= false;
23786 if (strncmp (r
, "no-", 3) == 0)
23792 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
23793 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
23795 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
23797 if (!rs6000_opt_masks
[i
].valid_target
)
23798 not_valid_p
= true;
23802 rs6000_isa_flags_explicit
|= mask
;
23804 /* VSX needs altivec, so -mvsx automagically sets
23805 altivec and disables -mavoid-indexed-addresses. */
23808 if (mask
== OPTION_MASK_VSX
)
23810 mask
|= OPTION_MASK_ALTIVEC
;
23811 TARGET_AVOID_XFORM
= 0;
23815 if (rs6000_opt_masks
[i
].invert
)
23819 rs6000_isa_flags
&= ~mask
;
23821 rs6000_isa_flags
|= mask
;
23826 if (error_p
&& !not_valid_p
)
23828 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
23829 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
23831 size_t j
= rs6000_opt_vars
[i
].global_offset
;
23832 *((int *) ((char *)&global_options
+ j
)) = !invert
;
23834 not_valid_p
= false;
23842 const char *eprefix
, *esuffix
;
23847 eprefix
= "__attribute__((__target__(";
23852 eprefix
= "#pragma GCC target ";
23857 error ("invalid cpu %qs for %s%qs%s", cpu_opt
, eprefix
,
23859 else if (not_valid_p
)
23860 error ("%s%qs%s is not allowed", eprefix
, q
, esuffix
);
23862 error ("%s%qs%s is invalid", eprefix
, q
, esuffix
);
23867 else if (TREE_CODE (args
) == TREE_LIST
)
23871 tree value
= TREE_VALUE (args
);
23874 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
23878 args
= TREE_CHAIN (args
);
23880 while (args
!= NULL_TREE
);
23885 error ("attribute %<target%> argument not a string");
23892 /* Print out the target options as a list for -mdebug=target. */
23895 rs6000_debug_target_options (tree args
, const char *prefix
)
23897 if (args
== NULL_TREE
)
23898 fprintf (stderr
, "%s<NULL>", prefix
);
23900 else if (TREE_CODE (args
) == STRING_CST
)
23902 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
23905 while ((q
= strtok (p
, ",")) != NULL
)
23908 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
23913 else if (TREE_CODE (args
) == TREE_LIST
)
23917 tree value
= TREE_VALUE (args
);
23920 rs6000_debug_target_options (value
, prefix
);
23923 args
= TREE_CHAIN (args
);
23925 while (args
!= NULL_TREE
);
23929 gcc_unreachable ();
23935 /* Hook to validate attribute((target("..."))). */
23938 rs6000_valid_attribute_p (tree fndecl
,
23939 tree
ARG_UNUSED (name
),
23943 struct cl_target_option cur_target
;
23946 tree new_target
, new_optimize
;
23947 tree func_optimize
;
23949 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
23951 if (TARGET_DEBUG_TARGET
)
23953 tree tname
= DECL_NAME (fndecl
);
23954 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
23956 fprintf (stderr
, "function: %.*s\n",
23957 (int) IDENTIFIER_LENGTH (tname
),
23958 IDENTIFIER_POINTER (tname
));
23960 fprintf (stderr
, "function: unknown\n");
23962 fprintf (stderr
, "args:");
23963 rs6000_debug_target_options (args
, " ");
23964 fprintf (stderr
, "\n");
23967 fprintf (stderr
, "flags: 0x%x\n", flags
);
23969 fprintf (stderr
, "--------------------\n");
23972 /* attribute((target("default"))) does nothing, beyond
23973 affecting multi-versioning. */
23974 if (TREE_VALUE (args
)
23975 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
23976 && TREE_CHAIN (args
) == NULL_TREE
23977 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
23980 old_optimize
= build_optimization_node (&global_options
,
23981 &global_options_set
);
23982 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
23984 /* If the function changed the optimization levels as well as setting target
23985 options, start with the optimizations specified. */
23986 if (func_optimize
&& func_optimize
!= old_optimize
)
23987 cl_optimization_restore (&global_options
, &global_options_set
,
23988 TREE_OPTIMIZATION (func_optimize
));
23990 /* The target attributes may also change some optimization flags, so update
23991 the optimization options if necessary. */
23992 cl_target_option_save (&cur_target
, &global_options
, &global_options_set
);
23993 rs6000_cpu_index
= rs6000_tune_index
= -1;
23994 ret
= rs6000_inner_target_options (args
, true);
23996 /* Set up any additional state. */
23999 ret
= rs6000_option_override_internal (false);
24000 new_target
= build_target_option_node (&global_options
,
24001 &global_options_set
);
24006 new_optimize
= build_optimization_node (&global_options
,
24007 &global_options_set
);
24014 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
24016 if (old_optimize
!= new_optimize
)
24017 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
24020 cl_target_option_restore (&global_options
, &global_options_set
, &cur_target
);
24022 if (old_optimize
!= new_optimize
)
24023 cl_optimization_restore (&global_options
, &global_options_set
,
24024 TREE_OPTIMIZATION (old_optimize
));
24030 /* Hook to validate the current #pragma GCC target and set the state, and
24031 update the macros based on what was changed. If ARGS is NULL, then
24032 POP_TARGET is used to reset the options. */
24035 rs6000_pragma_target_parse (tree args
, tree pop_target
)
24037 tree prev_tree
= build_target_option_node (&global_options
,
24038 &global_options_set
);
24040 struct cl_target_option
*prev_opt
, *cur_opt
;
24041 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
24042 HOST_WIDE_INT prev_bumask
, cur_bumask
, diff_bumask
;
24044 if (TARGET_DEBUG_TARGET
)
24046 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
24047 fprintf (stderr
, "args:");
24048 rs6000_debug_target_options (args
, " ");
24049 fprintf (stderr
, "\n");
24053 fprintf (stderr
, "pop_target:\n");
24054 debug_tree (pop_target
);
24057 fprintf (stderr
, "pop_target: <NULL>\n");
24059 fprintf (stderr
, "--------------------\n");
24064 cur_tree
= ((pop_target
)
24066 : target_option_default_node
);
24067 cl_target_option_restore (&global_options
, &global_options_set
,
24068 TREE_TARGET_OPTION (cur_tree
));
24072 rs6000_cpu_index
= rs6000_tune_index
= -1;
24073 if (!rs6000_inner_target_options (args
, false)
24074 || !rs6000_option_override_internal (false)
24075 || (cur_tree
= build_target_option_node (&global_options
,
24076 &global_options_set
))
24079 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
24080 fprintf (stderr
, "invalid pragma\n");
24086 target_option_current_node
= cur_tree
;
24087 rs6000_activate_target_options (target_option_current_node
);
24089 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24090 change the macros that are defined. */
24091 if (rs6000_target_modify_macros_ptr
)
24093 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
24094 prev_bumask
= prev_opt
->x_rs6000_builtin_mask
;
24095 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
24097 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
24098 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
24099 cur_bumask
= cur_opt
->x_rs6000_builtin_mask
;
24101 diff_bumask
= (prev_bumask
^ cur_bumask
);
24102 diff_flags
= (prev_flags
^ cur_flags
);
24104 if ((diff_flags
!= 0) || (diff_bumask
!= 0))
24106 /* Delete old macros. */
24107 rs6000_target_modify_macros_ptr (false,
24108 prev_flags
& diff_flags
,
24109 prev_bumask
& diff_bumask
);
24111 /* Define new macros. */
24112 rs6000_target_modify_macros_ptr (true,
24113 cur_flags
& diff_flags
,
24114 cur_bumask
& diff_bumask
);
24122 /* Remember the last target of rs6000_set_current_function. */
24123 static GTY(()) tree rs6000_previous_fndecl
;
24125 /* Restore target's globals from NEW_TREE and invalidate the
24126 rs6000_previous_fndecl cache. */
24129 rs6000_activate_target_options (tree new_tree
)
24131 cl_target_option_restore (&global_options
, &global_options_set
,
24132 TREE_TARGET_OPTION (new_tree
));
24133 if (TREE_TARGET_GLOBALS (new_tree
))
24134 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
24135 else if (new_tree
== target_option_default_node
)
24136 restore_target_globals (&default_target_globals
);
24138 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
24139 rs6000_previous_fndecl
= NULL_TREE
;
24142 /* Establish appropriate back-end context for processing the function
24143 FNDECL. The argument might be NULL to indicate processing at top
24144 level, outside of any function scope. */
24146 rs6000_set_current_function (tree fndecl
)
24148 if (TARGET_DEBUG_TARGET
)
24150 fprintf (stderr
, "\n==================== rs6000_set_current_function");
24153 fprintf (stderr
, ", fndecl %s (%p)",
24154 (DECL_NAME (fndecl
)
24155 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
24156 : "<unknown>"), (void *)fndecl
);
24158 if (rs6000_previous_fndecl
)
24159 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
24161 fprintf (stderr
, "\n");
24164 /* Only change the context if the function changes. This hook is called
24165 several times in the course of compiling a function, and we don't want to
24166 slow things down too much or call target_reinit when it isn't safe. */
24167 if (fndecl
== rs6000_previous_fndecl
)
24171 if (rs6000_previous_fndecl
== NULL_TREE
)
24172 old_tree
= target_option_current_node
;
24173 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
))
24174 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
);
24176 old_tree
= target_option_default_node
;
24179 if (fndecl
== NULL_TREE
)
24181 if (old_tree
!= target_option_current_node
)
24182 new_tree
= target_option_current_node
;
24184 new_tree
= NULL_TREE
;
24188 new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
24189 if (new_tree
== NULL_TREE
)
24190 new_tree
= target_option_default_node
;
24193 if (TARGET_DEBUG_TARGET
)
24197 fprintf (stderr
, "\nnew fndecl target specific options:\n");
24198 debug_tree (new_tree
);
24203 fprintf (stderr
, "\nold fndecl target specific options:\n");
24204 debug_tree (old_tree
);
24207 if (old_tree
!= NULL_TREE
|| new_tree
!= NULL_TREE
)
24208 fprintf (stderr
, "--------------------\n");
24211 if (new_tree
&& old_tree
!= new_tree
)
24212 rs6000_activate_target_options (new_tree
);
24215 rs6000_previous_fndecl
= fndecl
;
24219 /* Save the current options */
24222 rs6000_function_specific_save (struct cl_target_option
*ptr
,
24223 struct gcc_options
*opts
,
24224 struct gcc_options */
* opts_set */
)
24226 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
24227 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
24230 /* Restore the current options */
24233 rs6000_function_specific_restore (struct gcc_options
*opts
,
24234 struct gcc_options */
* opts_set */
,
24235 struct cl_target_option
*ptr
)
24238 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
24239 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
24240 (void) rs6000_option_override_internal (false);
24243 /* Print the current options */
24246 rs6000_function_specific_print (FILE *file
, int indent
,
24247 struct cl_target_option
*ptr
)
24249 rs6000_print_isa_options (file
, indent
, "Isa options set",
24250 ptr
->x_rs6000_isa_flags
);
24252 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
24253 ptr
->x_rs6000_isa_flags_explicit
);
24256 /* Helper function to print the current isa or misc options on a line. */
24259 rs6000_print_options_internal (FILE *file
,
24261 const char *string
,
24262 HOST_WIDE_INT flags
,
24263 const char *prefix
,
24264 const struct rs6000_opt_mask
*opts
,
24265 size_t num_elements
)
24268 size_t start_column
= 0;
24270 size_t max_column
= 120;
24271 size_t prefix_len
= strlen (prefix
);
24272 size_t comma_len
= 0;
24273 const char *comma
= "";
24276 start_column
+= fprintf (file
, "%*s", indent
, "");
24280 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
24284 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
24286 /* Print the various mask options. */
24287 cur_column
= start_column
;
24288 for (i
= 0; i
< num_elements
; i
++)
24290 bool invert
= opts
[i
].invert
;
24291 const char *name
= opts
[i
].name
;
24292 const char *no_str
= "";
24293 HOST_WIDE_INT mask
= opts
[i
].mask
;
24294 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
24298 if ((flags
& mask
) == 0)
24301 len
+= strlen ("no-");
24309 if ((flags
& mask
) != 0)
24312 len
+= strlen ("no-");
24319 if (cur_column
> max_column
)
24321 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
24322 cur_column
= start_column
+ len
;
24326 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
24328 comma_len
= strlen (", ");
24331 fputs ("\n", file
);
24334 /* Helper function to print the current isa options on a line. */
24337 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
24338 HOST_WIDE_INT flags
)
24340 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
24341 &rs6000_opt_masks
[0],
24342 ARRAY_SIZE (rs6000_opt_masks
));
24346 rs6000_print_builtin_options (FILE *file
, int indent
, const char *string
,
24347 HOST_WIDE_INT flags
)
24349 rs6000_print_options_internal (file
, indent
, string
, flags
, "",
24350 &rs6000_builtin_mask_names
[0],
24351 ARRAY_SIZE (rs6000_builtin_mask_names
));
24354 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24355 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24356 -mupper-regs-df, etc.).
24358 If the user used -mno-power8-vector, we need to turn off all of the implicit
24359 ISA 2.07 and 3.0 options that relate to the vector unit.
24361 If the user used -mno-power9-vector, we need to turn off all of the implicit
24362 ISA 3.0 options that relate to the vector unit.
24364 This function does not handle explicit options such as the user specifying
24365 -mdirect-move. These are handled in rs6000_option_override_internal, and
24366 the appropriate error is given if needed.
24368 We return a mask of all of the implicit options that should not be enabled
24371 static HOST_WIDE_INT
24372 rs6000_disable_incompatible_switches (void)
24374 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
24377 static const struct {
24378 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
24379 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
24380 const char *const name
; /* name of the switch. */
24382 { OPTION_MASK_POWER10
, OTHER_POWER10_MASKS
, "power10" },
24383 { OPTION_MASK_P9_VECTOR
, OTHER_P9_VECTOR_MASKS
, "power9-vector" },
24384 { OPTION_MASK_P8_VECTOR
, OTHER_P8_VECTOR_MASKS
, "power8-vector" },
24385 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
24386 { OPTION_MASK_ALTIVEC
, OTHER_ALTIVEC_MASKS
, "altivec" },
24389 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
24391 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
24393 if ((rs6000_isa_flags
& no_flag
) == 0
24394 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
24396 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
24397 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
24403 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
24404 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
24406 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
24407 error ("%<-mno-%s%> turns off %<-m%s%>",
24409 rs6000_opt_masks
[j
].name
);
24412 gcc_assert (!set_flags
);
24415 rs6000_isa_flags
&= ~dep_flags
;
24416 ignore_masks
|= no_flag
| dep_flags
;
24420 return ignore_masks
;
24424 /* Helper function for printing the function name when debugging. */
24426 static const char *
24427 get_decl_name (tree fn
)
24434 name
= DECL_NAME (fn
);
24436 return "<no-name>";
24438 return IDENTIFIER_POINTER (name
);
24441 /* Return the clone id of the target we are compiling code for in a target
24442 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24443 the priority list for the target clones (ordered from lowest to
24447 rs6000_clone_priority (tree fndecl
)
24449 tree fn_opts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
24450 HOST_WIDE_INT isa_masks
;
24451 int ret
= CLONE_DEFAULT
;
24452 tree attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (fndecl
));
24453 const char *attrs_str
= NULL
;
24455 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
24456 attrs_str
= TREE_STRING_POINTER (attrs
);
24458 /* Return priority zero for default function. Return the ISA needed for the
24459 function if it is not the default. */
24460 if (strcmp (attrs_str
, "default") != 0)
24462 if (fn_opts
== NULL_TREE
)
24463 fn_opts
= target_option_default_node
;
24465 if (!fn_opts
|| !TREE_TARGET_OPTION (fn_opts
))
24466 isa_masks
= rs6000_isa_flags
;
24468 isa_masks
= TREE_TARGET_OPTION (fn_opts
)->x_rs6000_isa_flags
;
24470 for (ret
= CLONE_MAX
- 1; ret
!= 0; ret
--)
24471 if ((rs6000_clone_map
[ret
].isa_mask
& isa_masks
) != 0)
24475 if (TARGET_DEBUG_TARGET
)
24476 fprintf (stderr
, "rs6000_get_function_version_priority (%s) => %d\n",
24477 get_decl_name (fndecl
), ret
);
24482 /* This compares the priority of target features in function DECL1 and DECL2.
24483 It returns positive value if DECL1 is higher priority, negative value if
24484 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24485 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24488 rs6000_compare_version_priority (tree decl1
, tree decl2
)
24490 int priority1
= rs6000_clone_priority (decl1
);
24491 int priority2
= rs6000_clone_priority (decl2
);
24492 int ret
= priority1
- priority2
;
24494 if (TARGET_DEBUG_TARGET
)
24495 fprintf (stderr
, "rs6000_compare_version_priority (%s, %s) => %d\n",
24496 get_decl_name (decl1
), get_decl_name (decl2
), ret
);
24501 /* Make a dispatcher declaration for the multi-versioned function DECL.
24502 Calls to DECL function will be replaced with calls to the dispatcher
24503 by the front-end. Returns the decl of the dispatcher function. */
24506 rs6000_get_function_versions_dispatcher (void *decl
)
24508 tree fn
= (tree
) decl
;
24509 struct cgraph_node
*node
= NULL
;
24510 struct cgraph_node
*default_node
= NULL
;
24511 struct cgraph_function_version_info
*node_v
= NULL
;
24512 struct cgraph_function_version_info
*first_v
= NULL
;
24514 tree dispatch_decl
= NULL
;
24516 struct cgraph_function_version_info
*default_version_info
= NULL
;
24517 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
24519 if (TARGET_DEBUG_TARGET
)
24520 fprintf (stderr
, "rs6000_get_function_versions_dispatcher (%s)\n",
24521 get_decl_name (fn
));
24523 node
= cgraph_node::get (fn
);
24524 gcc_assert (node
!= NULL
);
24526 node_v
= node
->function_version ();
24527 gcc_assert (node_v
!= NULL
);
24529 if (node_v
->dispatcher_resolver
!= NULL
)
24530 return node_v
->dispatcher_resolver
;
24532 /* Find the default version and make it the first node. */
24534 /* Go to the beginning of the chain. */
24535 while (first_v
->prev
!= NULL
)
24536 first_v
= first_v
->prev
;
24538 default_version_info
= first_v
;
24539 while (default_version_info
!= NULL
)
24541 const tree decl2
= default_version_info
->this_node
->decl
;
24542 if (is_function_default_version (decl2
))
24544 default_version_info
= default_version_info
->next
;
24547 /* If there is no default node, just return NULL. */
24548 if (default_version_info
== NULL
)
24551 /* Make default info the first node. */
24552 if (first_v
!= default_version_info
)
24554 default_version_info
->prev
->next
= default_version_info
->next
;
24555 if (default_version_info
->next
)
24556 default_version_info
->next
->prev
= default_version_info
->prev
;
24557 first_v
->prev
= default_version_info
;
24558 default_version_info
->next
= first_v
;
24559 default_version_info
->prev
= NULL
;
24562 default_node
= default_version_info
->this_node
;
24564 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24565 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
24566 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24567 "exports hardware capability bits");
24570 if (targetm
.has_ifunc_p ())
24572 struct cgraph_function_version_info
*it_v
= NULL
;
24573 struct cgraph_node
*dispatcher_node
= NULL
;
24574 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
24576 /* Right now, the dispatching is done via ifunc. */
24577 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
24579 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
24580 gcc_assert (dispatcher_node
!= NULL
);
24581 dispatcher_node
->dispatcher_function
= 1;
24582 dispatcher_version_info
24583 = dispatcher_node
->insert_new_function_version ();
24584 dispatcher_version_info
->next
= default_version_info
;
24585 dispatcher_node
->definition
= 1;
24587 /* Set the dispatcher for all the versions. */
24588 it_v
= default_version_info
;
24589 while (it_v
!= NULL
)
24591 it_v
->dispatcher_resolver
= dispatch_decl
;
24597 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
24598 "multiversioning needs ifunc which is not supported "
24603 return dispatch_decl
;
24606 /* Make the resolver function decl to dispatch the versions of a multi-
24607 versioned function, DEFAULT_DECL. Create an empty basic block in the
24608 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
24612 make_resolver_func (const tree default_decl
,
24613 const tree dispatch_decl
,
24614 basic_block
*empty_bb
)
24616 /* Make the resolver function static. The resolver function returns
24618 tree decl_name
= clone_function_name (default_decl
, "resolver");
24619 const char *resolver_name
= IDENTIFIER_POINTER (decl_name
);
24620 tree type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
24621 tree decl
= build_fn_decl (resolver_name
, type
);
24622 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
24624 DECL_NAME (decl
) = decl_name
;
24625 TREE_USED (decl
) = 1;
24626 DECL_ARTIFICIAL (decl
) = 1;
24627 DECL_IGNORED_P (decl
) = 0;
24628 TREE_PUBLIC (decl
) = 0;
24629 DECL_UNINLINABLE (decl
) = 1;
24631 /* Resolver is not external, body is generated. */
24632 DECL_EXTERNAL (decl
) = 0;
24633 DECL_EXTERNAL (dispatch_decl
) = 0;
24635 DECL_CONTEXT (decl
) = NULL_TREE
;
24636 DECL_INITIAL (decl
) = make_node (BLOCK
);
24637 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
24639 if (DECL_COMDAT_GROUP (default_decl
)
24640 || TREE_PUBLIC (default_decl
))
24642 /* In this case, each translation unit with a call to this
24643 versioned function will put out a resolver. Ensure it
24644 is comdat to keep just one copy. */
24645 DECL_COMDAT (decl
) = 1;
24646 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
24649 TREE_PUBLIC (dispatch_decl
) = 0;
24651 /* Build result decl and add to function_decl. */
24652 tree t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
24653 DECL_CONTEXT (t
) = decl
;
24654 DECL_ARTIFICIAL (t
) = 1;
24655 DECL_IGNORED_P (t
) = 1;
24656 DECL_RESULT (decl
) = t
;
24658 gimplify_function_tree (decl
);
24659 push_cfun (DECL_STRUCT_FUNCTION (decl
));
24660 *empty_bb
= init_lowered_empty_function (decl
, false,
24661 profile_count::uninitialized ());
24663 cgraph_node::add_new_function (decl
, true);
24664 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
24668 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
24669 DECL_ATTRIBUTES (dispatch_decl
)
24670 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
24672 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
24677 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
24678 return a pointer to VERSION_DECL if we are running on a machine that
24679 supports the index CLONE_ISA hardware architecture bits. This function will
24680 be called during version dispatch to decide which function version to
24681 execute. It returns the basic block at the end, to which more conditions
24685 add_condition_to_bb (tree function_decl
, tree version_decl
,
24686 int clone_isa
, basic_block new_bb
)
24688 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
24690 gcc_assert (new_bb
!= NULL
);
24691 gimple_seq gseq
= bb_seq (new_bb
);
24694 tree convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
24695 build_fold_addr_expr (version_decl
));
24696 tree result_var
= create_tmp_var (ptr_type_node
);
24697 gimple
*convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
24698 gimple
*return_stmt
= gimple_build_return (result_var
);
24700 if (clone_isa
== CLONE_DEFAULT
)
24702 gimple_seq_add_stmt (&gseq
, convert_stmt
);
24703 gimple_seq_add_stmt (&gseq
, return_stmt
);
24704 set_bb_seq (new_bb
, gseq
);
24705 gimple_set_bb (convert_stmt
, new_bb
);
24706 gimple_set_bb (return_stmt
, new_bb
);
24711 tree bool_zero
= build_int_cst (bool_int_type_node
, 0);
24712 tree cond_var
= create_tmp_var (bool_int_type_node
);
24713 tree predicate_decl
= rs6000_builtin_decls
[(int) RS6000_BUILTIN_CPU_SUPPORTS
];
24714 const char *arg_str
= rs6000_clone_map
[clone_isa
].name
;
24715 tree predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
24716 gimple
*call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
24717 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
24719 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
24720 gimple_set_bb (call_cond_stmt
, new_bb
);
24721 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
24723 gimple
*if_else_stmt
= gimple_build_cond (NE_EXPR
, cond_var
, bool_zero
,
24724 NULL_TREE
, NULL_TREE
);
24725 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
24726 gimple_set_bb (if_else_stmt
, new_bb
);
24727 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
24729 gimple_seq_add_stmt (&gseq
, convert_stmt
);
24730 gimple_seq_add_stmt (&gseq
, return_stmt
);
24731 set_bb_seq (new_bb
, gseq
);
24733 basic_block bb1
= new_bb
;
24734 edge e12
= split_block (bb1
, if_else_stmt
);
24735 basic_block bb2
= e12
->dest
;
24736 e12
->flags
&= ~EDGE_FALLTHRU
;
24737 e12
->flags
|= EDGE_TRUE_VALUE
;
24739 edge e23
= split_block (bb2
, return_stmt
);
24740 gimple_set_bb (convert_stmt
, bb2
);
24741 gimple_set_bb (return_stmt
, bb2
);
24743 basic_block bb3
= e23
->dest
;
24744 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
24747 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
24753 /* This function generates the dispatch function for multi-versioned functions.
24754 DISPATCH_DECL is the function which will contain the dispatch logic.
24755 FNDECLS are the function choices for dispatch, and is a tree chain.
24756 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
24757 code is generated. */
24760 dispatch_function_versions (tree dispatch_decl
,
24762 basic_block
*empty_bb
)
24766 vec
<tree
> *fndecls
;
24767 tree clones
[CLONE_MAX
];
24769 if (TARGET_DEBUG_TARGET
)
24770 fputs ("dispatch_function_versions, top\n", stderr
);
24772 gcc_assert (dispatch_decl
!= NULL
24773 && fndecls_p
!= NULL
24774 && empty_bb
!= NULL
);
24776 /* fndecls_p is actually a vector. */
24777 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
24779 /* At least one more version other than the default. */
24780 gcc_assert (fndecls
->length () >= 2);
24782 /* The first version in the vector is the default decl. */
24783 memset ((void *) clones
, '\0', sizeof (clones
));
24784 clones
[CLONE_DEFAULT
] = (*fndecls
)[0];
24786 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
24787 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
24788 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
24789 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24790 to insert the code here to do the call. */
24792 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
24794 int priority
= rs6000_clone_priority (ele
);
24795 if (!clones
[priority
])
24796 clones
[priority
] = ele
;
24799 for (ix
= CLONE_MAX
- 1; ix
>= 0; ix
--)
24802 if (TARGET_DEBUG_TARGET
)
24803 fprintf (stderr
, "dispatch_function_versions, clone %d, %s\n",
24804 ix
, get_decl_name (clones
[ix
]));
24806 *empty_bb
= add_condition_to_bb (dispatch_decl
, clones
[ix
], ix
,
24813 /* Generate the dispatching code body to dispatch multi-versioned function
24814 DECL. The target hook is called to process the "target" attributes and
24815 provide the code to dispatch the right function at run-time. NODE points
24816 to the dispatcher decl whose body will be created. */
24819 rs6000_generate_version_dispatcher_body (void *node_p
)
24822 basic_block empty_bb
;
24823 struct cgraph_node
*node
= (cgraph_node
*) node_p
;
24824 struct cgraph_function_version_info
*ninfo
= node
->function_version ();
24826 if (ninfo
->dispatcher_resolver
)
24827 return ninfo
->dispatcher_resolver
;
24829 /* node is going to be an alias, so remove the finalized bit. */
24830 node
->definition
= false;
24832 /* The first version in the chain corresponds to the default version. */
24833 ninfo
->dispatcher_resolver
= resolver
24834 = make_resolver_func (ninfo
->next
->this_node
->decl
, node
->decl
, &empty_bb
);
24836 if (TARGET_DEBUG_TARGET
)
24837 fprintf (stderr
, "rs6000_get_function_versions_dispatcher, %s\n",
24838 get_decl_name (resolver
));
24840 push_cfun (DECL_STRUCT_FUNCTION (resolver
));
24841 auto_vec
<tree
, 2> fn_ver_vec
;
24843 for (struct cgraph_function_version_info
*vinfo
= ninfo
->next
;
24845 vinfo
= vinfo
->next
)
24847 struct cgraph_node
*version
= vinfo
->this_node
;
24848 /* Check for virtual functions here again, as by this time it should
24849 have been determined if this function needs a vtable index or
24850 not. This happens for methods in derived classes that override
24851 virtual methods in base classes but are not explicitly marked as
24853 if (DECL_VINDEX (version
->decl
))
24854 sorry ("Virtual function multiversioning not supported");
24856 fn_ver_vec
.safe_push (version
->decl
);
24859 dispatch_function_versions (resolver
, &fn_ver_vec
, &empty_bb
);
24860 cgraph_edge::rebuild_edges ();
24866 /* Hook to determine if one function can safely inline another. */
24869 rs6000_can_inline_p (tree caller
, tree callee
)
24872 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
24873 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
24875 /* If the callee has no option attributes, then it is ok to inline. */
24881 HOST_WIDE_INT caller_isa
;
24882 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24883 HOST_WIDE_INT callee_isa
= callee_opts
->x_rs6000_isa_flags
;
24884 HOST_WIDE_INT explicit_isa
= callee_opts
->x_rs6000_isa_flags_explicit
;
24886 /* If the caller has option attributes, then use them.
24887 Otherwise, use the command line options. */
24889 caller_isa
= TREE_TARGET_OPTION (caller_tree
)->x_rs6000_isa_flags
;
24891 caller_isa
= rs6000_isa_flags
;
24893 /* The callee's options must be a subset of the caller's options, i.e.
24894 a vsx function may inline an altivec function, but a no-vsx function
24895 must not inline a vsx function. However, for those options that the
24896 callee has explicitly enabled or disabled, then we must enforce that
24897 the callee's and caller's options match exactly; see PR70010. */
24898 if (((caller_isa
& callee_isa
) == callee_isa
)
24899 && (caller_isa
& explicit_isa
) == (callee_isa
& explicit_isa
))
24903 if (TARGET_DEBUG_TARGET
)
24904 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24905 get_decl_name (caller
), get_decl_name (callee
),
24906 (ret
? "can" : "cannot"));
24911 /* Allocate a stack temp and fixup the address so it meets the particular
24912 memory requirements (either offetable or REG+REG addressing). */
24915 rs6000_allocate_stack_temp (machine_mode mode
,
24916 bool offsettable_p
,
24919 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
24920 rtx addr
= XEXP (stack
, 0);
24921 int strict_p
= reload_completed
;
24923 if (!legitimate_indirect_address_p (addr
, strict_p
))
24926 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
24927 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
24929 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
24930 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
24936 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24937 convert to such a form to deal with memory reference instructions
24938 like STFIWX and LDBRX that only take reg+reg addressing. */
24941 rs6000_force_indexed_or_indirect_mem (rtx x
)
24943 machine_mode mode
= GET_MODE (x
);
24945 gcc_assert (MEM_P (x
));
24946 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x
, mode
))
24948 rtx addr
= XEXP (x
, 0);
24949 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
24951 rtx reg
= XEXP (addr
, 0);
24952 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
24953 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
24954 gcc_assert (REG_P (reg
));
24955 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
24958 else if (GET_CODE (addr
) == PRE_MODIFY
)
24960 rtx reg
= XEXP (addr
, 0);
24961 rtx expr
= XEXP (addr
, 1);
24962 gcc_assert (REG_P (reg
));
24963 gcc_assert (GET_CODE (expr
) == PLUS
);
24964 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
24968 if (GET_CODE (addr
) == PLUS
)
24970 rtx op0
= XEXP (addr
, 0);
24971 rtx op1
= XEXP (addr
, 1);
24972 op0
= force_reg (Pmode
, op0
);
24973 op1
= force_reg (Pmode
, op1
);
24974 x
= replace_equiv_address (x
, gen_rtx_PLUS (Pmode
, op0
, op1
));
24977 x
= replace_equiv_address (x
, force_reg (Pmode
, addr
));
24983 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24985 On the RS/6000, all integer constants are acceptable, most won't be valid
24986 for particular insns, though. Only easy FP constants are acceptable. */
24989 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
24991 if (TARGET_ELF
&& tls_referenced_p (x
))
24994 if (CONST_DOUBLE_P (x
))
24995 return easy_fp_constant (x
, mode
);
24997 if (GET_CODE (x
) == CONST_VECTOR
)
24998 return easy_vector_constant (x
, mode
);
25004 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25007 chain_already_loaded (rtx_insn
*last
)
25009 for (; last
!= NULL
; last
= PREV_INSN (last
))
25011 if (NONJUMP_INSN_P (last
))
25013 rtx patt
= PATTERN (last
);
25015 if (GET_CODE (patt
) == SET
)
25017 rtx lhs
= XEXP (patt
, 0);
25019 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
25027 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25030 rs6000_call_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25032 rtx func
= func_desc
;
25033 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
25034 rtx toc_load
= NULL_RTX
;
25035 rtx toc_restore
= NULL_RTX
;
25037 rtx abi_reg
= NULL_RTX
;
25041 bool is_pltseq_longcall
;
25044 tlsarg
= global_tlsarg
;
25046 /* Handle longcall attributes. */
25047 is_pltseq_longcall
= false;
25048 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25049 && GET_CODE (func_desc
) == SYMBOL_REF
)
25051 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25053 is_pltseq_longcall
= true;
25056 /* Handle indirect calls. */
25057 if (!SYMBOL_REF_P (func
)
25058 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func
)))
25060 if (!rs6000_pcrel_p ())
25062 /* Save the TOC into its reserved slot before the call,
25063 and prepare to restore it after the call. */
25064 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
25065 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
25066 gen_rtvec (1, stack_toc_offset
),
25068 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
25070 /* Can we optimize saving the TOC in the prologue or
25071 do we need to do it at every call? */
25072 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
25073 cfun
->machine
->save_toc_in_prologue
= true;
25076 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
25077 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
25078 gen_rtx_PLUS (Pmode
, stack_ptr
,
25079 stack_toc_offset
));
25080 MEM_VOLATILE_P (stack_toc_mem
) = 1;
25081 if (is_pltseq_longcall
)
25083 rtvec v
= gen_rtvec (3, toc_reg
, func_desc
, tlsarg
);
25084 rtx mark_toc_reg
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25085 emit_insn (gen_rtx_SET (stack_toc_mem
, mark_toc_reg
));
25088 emit_move_insn (stack_toc_mem
, toc_reg
);
25092 if (DEFAULT_ABI
== ABI_ELFv2
)
25094 /* A function pointer in the ELFv2 ABI is just a plain address, but
25095 the ABI requires it to be loaded into r12 before the call. */
25096 func_addr
= gen_rtx_REG (Pmode
, 12);
25097 emit_move_insn (func_addr
, func
);
25098 abi_reg
= func_addr
;
25099 /* Indirect calls via CTR are strongly preferred over indirect
25100 calls via LR, so move the address there. Needed to mark
25101 this insn for linker plt sequence editing too. */
25102 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25103 if (is_pltseq_longcall
)
25105 rtvec v
= gen_rtvec (3, abi_reg
, func_desc
, tlsarg
);
25106 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25107 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25108 v
= gen_rtvec (2, func_addr
, func_desc
);
25109 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25112 emit_move_insn (func_addr
, abi_reg
);
25116 /* A function pointer under AIX is a pointer to a data area whose
25117 first word contains the actual address of the function, whose
25118 second word contains a pointer to its TOC, and whose third word
25119 contains a value to place in the static chain register (r11).
25120 Note that if we load the static chain, our "trampoline" need
25121 not have any executable code. */
25123 /* Load up address of the actual function. */
25124 func
= force_reg (Pmode
, func
);
25125 func_addr
= gen_reg_rtx (Pmode
);
25126 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func
));
25128 /* Indirect calls via CTR are strongly preferred over indirect
25129 calls via LR, so move the address there. */
25130 rtx ctr_reg
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25131 emit_move_insn (ctr_reg
, func_addr
);
25132 func_addr
= ctr_reg
;
25134 /* Prepare to load the TOC of the called function. Note that the
25135 TOC load must happen immediately before the actual call so
25136 that unwinding the TOC registers works correctly. See the
25137 comment in frob_update_context. */
25138 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
25139 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
25140 gen_rtx_PLUS (Pmode
, func
,
25142 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
25144 /* If we have a static chain, load it up. But, if the call was
25145 originally direct, the 3rd word has not been written since no
25146 trampoline has been built, so we ought not to load it, lest we
25147 override a static chain value. */
25148 if (!(GET_CODE (func_desc
) == SYMBOL_REF
25149 && SYMBOL_REF_FUNCTION_P (func_desc
))
25150 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25151 && !chain_already_loaded (get_current_sequence ()->next
->last
))
25153 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
25154 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
25155 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
25156 gen_rtx_PLUS (Pmode
, func
,
25158 emit_move_insn (sc_reg
, func_sc_mem
);
25165 /* No TOC register needed for calls from PC-relative callers. */
25166 if (!rs6000_pcrel_p ())
25167 /* Direct calls use the TOC: for local calls, the callee will
25168 assume the TOC register is set; for non-local calls, the
25169 PLT stub needs the TOC register. */
25174 /* Create the call. */
25175 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25176 if (value
!= NULL_RTX
)
25177 call
[0] = gen_rtx_SET (value
, call
[0]);
25178 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25182 call
[n_call
++] = toc_load
;
25184 call
[n_call
++] = toc_restore
;
25186 call
[n_call
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25188 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
25189 insn
= emit_call_insn (insn
);
25191 /* Mention all registers defined by the ABI to hold information
25192 as uses in CALL_INSN_FUNCTION_USAGE. */
25194 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25197 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25200 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25204 rtx r12
= NULL_RTX
;
25205 rtx func_addr
= func_desc
;
25207 gcc_assert (INTVAL (cookie
) == 0);
25210 tlsarg
= global_tlsarg
;
25212 /* For ELFv2, r12 and CTR need to hold the function address
25213 for an indirect call. */
25214 if (GET_CODE (func_desc
) != SYMBOL_REF
&& DEFAULT_ABI
== ABI_ELFv2
)
25216 r12
= gen_rtx_REG (Pmode
, 12);
25217 emit_move_insn (r12
, func_desc
);
25218 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25219 emit_move_insn (func_addr
, r12
);
25222 /* Create the call. */
25223 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25224 if (value
!= NULL_RTX
)
25225 call
[0] = gen_rtx_SET (value
, call
[0]);
25227 call
[1] = simple_return_rtx
;
25229 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
25230 insn
= emit_call_insn (insn
);
25232 /* Note use of the TOC register. */
25233 if (!rs6000_pcrel_p ())
25234 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
25235 gen_rtx_REG (Pmode
, TOC_REGNUM
));
25237 /* Note use of r12. */
25239 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), r12
);
25242 /* Expand code to perform a call under the SYSV4 ABI. */
25245 rs6000_call_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25247 rtx func
= func_desc
;
25251 rtx abi_reg
= NULL_RTX
;
25255 tlsarg
= global_tlsarg
;
25257 /* Handle longcall attributes. */
25258 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25259 && GET_CODE (func_desc
) == SYMBOL_REF
)
25261 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25262 /* If the longcall was implemented as an inline PLT call using
25263 PLT unspecs then func will be REG:r11. If not, func will be
25264 a pseudo reg. The inline PLT call sequence supports lazy
25265 linking (and longcalls to functions in dlopen'd libraries).
25266 The other style of longcalls don't. The lazy linking entry
25267 to the dynamic symbol resolver requires r11 be the function
25268 address (as it is for linker generated PLT stubs). Ensure
25269 r11 stays valid to the bctrl by marking r11 used by the call. */
25274 /* Handle indirect calls. */
25275 if (GET_CODE (func
) != SYMBOL_REF
)
25277 func
= force_reg (Pmode
, func
);
25279 /* Indirect calls via CTR are strongly preferred over indirect
25280 calls via LR, so move the address there. That can't be left
25281 to reload because we want to mark every instruction in an
25282 inline PLT call sequence with a reloc, enabling the linker to
25283 edit the sequence back to a direct call when that makes sense. */
25284 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25287 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
25288 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25289 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25290 v
= gen_rtvec (2, func_addr
, func_desc
);
25291 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25294 emit_move_insn (func_addr
, func
);
25299 /* Create the call. */
25300 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25301 if (value
!= NULL_RTX
)
25302 call
[0] = gen_rtx_SET (value
, call
[0]);
25304 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25306 if (TARGET_SECURE_PLT
25308 && GET_CODE (func_addr
) == SYMBOL_REF
25309 && !SYMBOL_REF_LOCAL_P (func_addr
))
25310 call
[n
++] = gen_rtx_USE (VOIDmode
, pic_offset_table_rtx
);
25312 call
[n
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25314 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n
, call
));
25315 insn
= emit_call_insn (insn
);
25317 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25320 /* Expand code to perform a sibling call under the SysV4 ABI. */
25323 rs6000_sibcall_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25325 rtx func
= func_desc
;
25329 rtx abi_reg
= NULL_RTX
;
25332 tlsarg
= global_tlsarg
;
25334 /* Handle longcall attributes. */
25335 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25336 && GET_CODE (func_desc
) == SYMBOL_REF
)
25338 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25339 /* If the longcall was implemented as an inline PLT call using
25340 PLT unspecs then func will be REG:r11. If not, func will be
25341 a pseudo reg. The inline PLT call sequence supports lazy
25342 linking (and longcalls to functions in dlopen'd libraries).
25343 The other style of longcalls don't. The lazy linking entry
25344 to the dynamic symbol resolver requires r11 be the function
25345 address (as it is for linker generated PLT stubs). Ensure
25346 r11 stays valid to the bctr by marking r11 used by the call. */
25351 /* Handle indirect calls. */
25352 if (GET_CODE (func
) != SYMBOL_REF
)
25354 func
= force_reg (Pmode
, func
);
25356 /* Indirect sibcalls must go via CTR. That can't be left to
25357 reload because we want to mark every instruction in an inline
25358 PLT call sequence with a reloc, enabling the linker to edit
25359 the sequence back to a direct call when that makes sense. */
25360 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25363 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
25364 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25365 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25366 v
= gen_rtvec (2, func_addr
, func_desc
);
25367 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25370 emit_move_insn (func_addr
, func
);
25375 /* Create the call. */
25376 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25377 if (value
!= NULL_RTX
)
25378 call
[0] = gen_rtx_SET (value
, call
[0]);
25380 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25381 call
[2] = simple_return_rtx
;
25383 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
25384 insn
= emit_call_insn (insn
);
25386 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25391 /* Expand code to perform a call under the Darwin ABI.
25392 Modulo handling of mlongcall, this is much the same as sysv.
25393 if/when the longcall optimisation is removed, we could drop this
25394 code and use the sysv case (taking care to avoid the tls stuff).
25396 We can use this for sibcalls too, if needed. */
25399 rs6000_call_darwin_1 (rtx value
, rtx func_desc
, rtx tlsarg
,
25400 rtx cookie
, bool sibcall
)
25402 rtx func
= func_desc
;
25406 int cookie_val
= INTVAL (cookie
);
25407 bool make_island
= false;
25409 /* Handle longcall attributes, there are two cases for Darwin:
25410 1) Newer linkers are capable of synthesising any branch islands needed.
25411 2) We need a helper branch island synthesised by the compiler.
25412 The second case has mostly been retired and we don't use it for m64.
25413 In fact, it's is an optimisation, we could just indirect as sysv does..
25414 ... however, backwards compatibility for now.
25415 If we're going to use this, then we need to keep the CALL_LONG bit set,
25416 so that we can pick up the special insn form later. */
25417 if ((cookie_val
& CALL_LONG
) != 0
25418 && GET_CODE (func_desc
) == SYMBOL_REF
)
25420 /* FIXME: the longcall opt should not hang off this flag, it is most
25421 likely incorrect for kernel-mode code-generation. */
25422 if (darwin_symbol_stubs
&& TARGET_32BIT
)
25423 make_island
= true; /* Do nothing yet, retain the CALL_LONG flag. */
25426 /* The linker is capable of doing this, but the user explicitly
25427 asked for -mlongcall, so we'll do the 'normal' version. */
25428 func
= rs6000_longcall_ref (func_desc
, NULL_RTX
);
25429 cookie_val
&= ~CALL_LONG
; /* Handled, zap it. */
25433 /* Handle indirect calls. */
25434 if (GET_CODE (func
) != SYMBOL_REF
)
25436 func
= force_reg (Pmode
, func
);
25438 /* Indirect calls via CTR are strongly preferred over indirect
25439 calls via LR, and are required for indirect sibcalls, so move
25440 the address there. */
25441 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25442 emit_move_insn (func_addr
, func
);
25447 /* Create the call. */
25448 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25449 if (value
!= NULL_RTX
)
25450 call
[0] = gen_rtx_SET (value
, call
[0]);
25452 call
[1] = gen_rtx_USE (VOIDmode
, GEN_INT (cookie_val
));
25455 call
[2] = simple_return_rtx
;
25457 call
[2] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25459 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
25460 insn
= emit_call_insn (insn
);
25461 /* Now we have the debug info in the insn, we can set up the branch island
25462 if we're using one. */
25465 tree funname
= get_identifier (XSTR (func_desc
, 0));
25467 if (no_previous_def (funname
))
25469 rtx label_rtx
= gen_label_rtx ();
25470 char *label_buf
, temp_buf
[256];
25471 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
25472 CODE_LABEL_NUMBER (label_rtx
));
25473 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
25474 tree labelname
= get_identifier (label_buf
);
25475 add_compiler_branch_island (labelname
, funname
,
25476 insn_line ((const rtx_insn
*)insn
));
25483 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
25484 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
25487 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, false);
25495 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
25496 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
25499 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, true);
25505 /* Return whether we should generate PC-relative code for FNDECL. */
25507 rs6000_fndecl_pcrel_p (const_tree fndecl
)
25509 if (DEFAULT_ABI
!= ABI_ELFv2
)
25512 struct cl_target_option
*opts
= target_opts_for_fn (fndecl
);
25514 return ((opts
->x_rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
25515 && TARGET_CMODEL
== CMODEL_MEDIUM
);
25518 /* Return whether we should generate PC-relative code for *FN. */
25520 rs6000_function_pcrel_p (struct function
*fn
)
25522 if (DEFAULT_ABI
!= ABI_ELFv2
)
25525 /* Optimize usual case. */
25527 return ((rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
25528 && TARGET_CMODEL
== CMODEL_MEDIUM
);
25530 return rs6000_fndecl_pcrel_p (fn
->decl
);
25533 /* Return whether we should generate PC-relative code for the current
25538 return (DEFAULT_ABI
== ABI_ELFv2
25539 && (rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
25540 && TARGET_CMODEL
== CMODEL_MEDIUM
);
25544 /* Given an address (ADDR), a mode (MODE), and what the format of the
25545 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
25546 for the address. */
25549 address_to_insn_form (rtx addr
,
25551 enum non_prefixed_form non_prefixed_format
)
25553 /* Single register is easy. */
25554 if (REG_P (addr
) || SUBREG_P (addr
))
25555 return INSN_FORM_BASE_REG
;
25557 /* If the non prefixed instruction format doesn't support offset addressing,
25558 make sure only indexed addressing is allowed.
25560 We special case SDmode so that the register allocator does not try to move
25561 SDmode through GPR registers, but instead uses the 32-bit integer load and
25562 store instructions for the floating point registers. */
25563 if (non_prefixed_format
== NON_PREFIXED_X
|| (mode
== SDmode
&& TARGET_DFP
))
25565 if (GET_CODE (addr
) != PLUS
)
25566 return INSN_FORM_BAD
;
25568 rtx op0
= XEXP (addr
, 0);
25569 rtx op1
= XEXP (addr
, 1);
25570 if (!REG_P (op0
) && !SUBREG_P (op0
))
25571 return INSN_FORM_BAD
;
25573 if (!REG_P (op1
) && !SUBREG_P (op1
))
25574 return INSN_FORM_BAD
;
25576 return INSN_FORM_X
;
25579 /* Deal with update forms. */
25580 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
25581 return INSN_FORM_UPDATE
;
25583 /* Handle PC-relative symbols and labels. Check for both local and
25584 external symbols. Assume labels are always local. TLS symbols
25585 are not PC-relative for rs6000. */
25588 if (LABEL_REF_P (addr
))
25589 return INSN_FORM_PCREL_LOCAL
;
25591 if (SYMBOL_REF_P (addr
) && !SYMBOL_REF_TLS_MODEL (addr
))
25593 if (!SYMBOL_REF_LOCAL_P (addr
))
25594 return INSN_FORM_PCREL_EXTERNAL
;
25596 return INSN_FORM_PCREL_LOCAL
;
25600 if (GET_CODE (addr
) == CONST
)
25601 addr
= XEXP (addr
, 0);
25603 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
25604 if (GET_CODE (addr
) == LO_SUM
)
25605 return INSN_FORM_LO_SUM
;
25607 /* Everything below must be an offset address of some form. */
25608 if (GET_CODE (addr
) != PLUS
)
25609 return INSN_FORM_BAD
;
25611 rtx op0
= XEXP (addr
, 0);
25612 rtx op1
= XEXP (addr
, 1);
25614 /* Check for indexed addresses. */
25615 if (REG_P (op1
) || SUBREG_P (op1
))
25617 if (REG_P (op0
) || SUBREG_P (op0
))
25618 return INSN_FORM_X
;
25620 return INSN_FORM_BAD
;
25623 if (!CONST_INT_P (op1
))
25624 return INSN_FORM_BAD
;
25626 HOST_WIDE_INT offset
= INTVAL (op1
);
25627 if (!SIGNED_INTEGER_34BIT_P (offset
))
25628 return INSN_FORM_BAD
;
25630 /* Check for local and external PC-relative addresses. Labels are always
25631 local. TLS symbols are not PC-relative for rs6000. */
25634 if (LABEL_REF_P (op0
))
25635 return INSN_FORM_PCREL_LOCAL
;
25637 if (SYMBOL_REF_P (op0
) && !SYMBOL_REF_TLS_MODEL (op0
))
25639 if (!SYMBOL_REF_LOCAL_P (op0
))
25640 return INSN_FORM_PCREL_EXTERNAL
;
25642 return INSN_FORM_PCREL_LOCAL
;
25646 /* If it isn't PC-relative, the address must use a base register. */
25647 if (!REG_P (op0
) && !SUBREG_P (op0
))
25648 return INSN_FORM_BAD
;
25650 /* Large offsets must be prefixed. */
25651 if (!SIGNED_INTEGER_16BIT_P (offset
))
25653 if (TARGET_PREFIXED
)
25654 return INSN_FORM_PREFIXED_NUMERIC
;
25656 return INSN_FORM_BAD
;
25659 /* We have a 16-bit offset, see what default instruction format to use. */
25660 if (non_prefixed_format
== NON_PREFIXED_DEFAULT
)
25662 unsigned size
= GET_MODE_SIZE (mode
);
25664 /* On 64-bit systems, assume 64-bit integers need to use DS form
25665 addresses (for LD/STD). VSX vectors need to use DQ form addresses
25666 (for LXV and STXV). TImode is problematical in that its normal usage
25667 is expected to be GPRs where it wants a DS instruction format, but if
25668 it goes into the vector registers, it wants a DQ instruction
25670 if (TARGET_POWERPC64
&& size
>= 8 && GET_MODE_CLASS (mode
) == MODE_INT
)
25671 non_prefixed_format
= NON_PREFIXED_DS
;
25673 else if (TARGET_VSX
&& size
>= 16
25674 && (VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
)))
25675 non_prefixed_format
= NON_PREFIXED_DQ
;
25678 non_prefixed_format
= NON_PREFIXED_D
;
25681 /* Classify the D/DS/DQ-form addresses. */
25682 switch (non_prefixed_format
)
25684 /* Instruction format D, all 16 bits are valid. */
25685 case NON_PREFIXED_D
:
25686 return INSN_FORM_D
;
25688 /* Instruction format DS, bottom 2 bits must be 0. */
25689 case NON_PREFIXED_DS
:
25690 if ((offset
& 3) == 0)
25691 return INSN_FORM_DS
;
25693 else if (TARGET_PREFIXED
)
25694 return INSN_FORM_PREFIXED_NUMERIC
;
25697 return INSN_FORM_BAD
;
25699 /* Instruction format DQ, bottom 4 bits must be 0. */
25700 case NON_PREFIXED_DQ
:
25701 if ((offset
& 15) == 0)
25702 return INSN_FORM_DQ
;
25704 else if (TARGET_PREFIXED
)
25705 return INSN_FORM_PREFIXED_NUMERIC
;
25708 return INSN_FORM_BAD
;
25714 return INSN_FORM_BAD
;
25717 /* Helper function to see if we're potentially looking at lfs/stfs.
25718 - PARALLEL containing a SET and a CLOBBER
25720 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
25721 - CLOBBER is a V4SF
25723 - SET is from UNSPEC_SF_FROM_SI to REG:SF
25728 is_lfs_stfs_insn (rtx_insn
*insn
)
25730 rtx pattern
= PATTERN (insn
);
25731 if (GET_CODE (pattern
) != PARALLEL
)
25734 /* This should be a parallel with exactly one set and one clobber. */
25735 if (XVECLEN (pattern
, 0) != 2)
25738 rtx set
= XVECEXP (pattern
, 0, 0);
25739 if (GET_CODE (set
) != SET
)
25742 rtx clobber
= XVECEXP (pattern
, 0, 1);
25743 if (GET_CODE (clobber
) != CLOBBER
)
25746 /* All we care is that the destination of the SET is a mem:SI,
25747 the source should be an UNSPEC_SI_FROM_SF, and the clobber
25748 should be a scratch:V4SF. */
25750 rtx dest
= SET_DEST (set
);
25751 rtx src
= SET_SRC (set
);
25752 rtx scratch
= SET_DEST (clobber
);
25754 if (GET_CODE (src
) != UNSPEC
)
25758 if (XINT (src
, 1) == UNSPEC_SI_FROM_SF
25759 && GET_CODE (dest
) == MEM
&& GET_MODE (dest
) == SImode
25760 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == V4SFmode
)
25764 if (XINT (src
, 1) == UNSPEC_SF_FROM_SI
25765 && GET_CODE (dest
) == REG
&& GET_MODE (dest
) == SFmode
25766 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == DImode
)
25772 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
25773 instruction format (D/DS/DQ) used for offset memory. */
25775 static enum non_prefixed_form
25776 reg_to_non_prefixed (rtx reg
, machine_mode mode
)
25778 /* If it isn't a register, use the defaults. */
25779 if (!REG_P (reg
) && !SUBREG_P (reg
))
25780 return NON_PREFIXED_DEFAULT
;
25782 unsigned int r
= reg_or_subregno (reg
);
25784 /* If we have a pseudo, use the default instruction format. */
25785 if (!HARD_REGISTER_NUM_P (r
))
25786 return NON_PREFIXED_DEFAULT
;
25788 unsigned size
= GET_MODE_SIZE (mode
);
25790 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
25791 128-bit floating point, and 128-bit integers. Before power9, only indexed
25792 addressing was available for vectors. */
25793 if (FP_REGNO_P (r
))
25795 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
25796 return NON_PREFIXED_D
;
25799 return NON_PREFIXED_X
;
25801 else if (TARGET_VSX
&& size
>= 16
25802 && (VECTOR_MODE_P (mode
)
25803 || VECTOR_ALIGNMENT_P (mode
)
25804 || mode
== TImode
|| mode
== CTImode
))
25805 return (TARGET_P9_VECTOR
) ? NON_PREFIXED_DQ
: NON_PREFIXED_X
;
25808 return NON_PREFIXED_DEFAULT
;
25811 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
25812 128-bit floating point, and 128-bit integers. Before power9, only indexed
25813 addressing was available. */
25814 else if (ALTIVEC_REGNO_P (r
))
25816 if (!TARGET_P9_VECTOR
)
25817 return NON_PREFIXED_X
;
25819 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
25820 return NON_PREFIXED_DS
;
25823 return NON_PREFIXED_X
;
25825 else if (TARGET_VSX
&& size
>= 16
25826 && (VECTOR_MODE_P (mode
)
25827 || VECTOR_ALIGNMENT_P (mode
)
25828 || mode
== TImode
|| mode
== CTImode
))
25829 return NON_PREFIXED_DQ
;
25832 return NON_PREFIXED_DEFAULT
;
25835 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
25836 otherwise. Assume that any other register, such as LR, CRs, etc. will go
25837 through the GPR registers for memory operations. */
25838 else if (TARGET_POWERPC64
&& size
>= 8)
25839 return NON_PREFIXED_DS
;
25841 return NON_PREFIXED_D
;
25845 /* Whether a load instruction is a prefixed instruction. This is called from
25846 the prefixed attribute processing. */
25849 prefixed_load_p (rtx_insn
*insn
)
25851 /* Validate the insn to make sure it is a normal load insn. */
25852 extract_insn_cached (insn
);
25853 if (recog_data
.n_operands
< 2)
25856 rtx reg
= recog_data
.operand
[0];
25857 rtx mem
= recog_data
.operand
[1];
25859 if (!REG_P (reg
) && !SUBREG_P (reg
))
25865 /* Prefixed load instructions do not support update or indexed forms. */
25866 if (get_attr_indexed (insn
) == INDEXED_YES
25867 || get_attr_update (insn
) == UPDATE_YES
)
25870 /* LWA uses the DS format instead of the D format that LWZ uses. */
25871 enum non_prefixed_form non_prefixed
;
25872 machine_mode reg_mode
= GET_MODE (reg
);
25873 machine_mode mem_mode
= GET_MODE (mem
);
25875 if (mem_mode
== SImode
&& reg_mode
== DImode
25876 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
25877 non_prefixed
= NON_PREFIXED_DS
;
25880 non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
25882 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
25883 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, NON_PREFIXED_DEFAULT
);
25885 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, non_prefixed
);
25888 /* Whether a store instruction is a prefixed instruction. This is called from
25889 the prefixed attribute processing. */
25892 prefixed_store_p (rtx_insn
*insn
)
25894 /* Validate the insn to make sure it is a normal store insn. */
25895 extract_insn_cached (insn
);
25896 if (recog_data
.n_operands
< 2)
25899 rtx mem
= recog_data
.operand
[0];
25900 rtx reg
= recog_data
.operand
[1];
25902 if (!REG_P (reg
) && !SUBREG_P (reg
))
25908 /* Prefixed store instructions do not support update or indexed forms. */
25909 if (get_attr_indexed (insn
) == INDEXED_YES
25910 || get_attr_update (insn
) == UPDATE_YES
)
25913 machine_mode mem_mode
= GET_MODE (mem
);
25914 rtx addr
= XEXP (mem
, 0);
25915 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
25917 /* Need to make sure we aren't looking at a stfs which doesn't look
25918 like the other things reg_to_non_prefixed/address_is_prefixed
25920 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
25921 return address_is_prefixed (addr
, mem_mode
, NON_PREFIXED_DEFAULT
);
25923 return address_is_prefixed (addr
, mem_mode
, non_prefixed
);
25926 /* Whether a load immediate or add instruction is a prefixed instruction. This
25927 is called from the prefixed attribute processing. */
25930 prefixed_paddi_p (rtx_insn
*insn
)
25932 rtx set
= single_set (insn
);
25936 rtx dest
= SET_DEST (set
);
25937 rtx src
= SET_SRC (set
);
25939 if (!REG_P (dest
) && !SUBREG_P (dest
))
25942 /* Is this a load immediate that can't be done with a simple ADDI or
25944 if (CONST_INT_P (src
))
25945 return (satisfies_constraint_eI (src
)
25946 && !satisfies_constraint_I (src
)
25947 && !satisfies_constraint_L (src
));
25949 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25951 if (GET_CODE (src
) == PLUS
)
25953 rtx op1
= XEXP (src
, 1);
25955 return (CONST_INT_P (op1
)
25956 && satisfies_constraint_eI (op1
)
25957 && !satisfies_constraint_I (op1
)
25958 && !satisfies_constraint_L (op1
));
25961 /* If not, is it a load of a PC-relative address? */
25962 if (!TARGET_PCREL
|| GET_MODE (dest
) != Pmode
)
25965 if (!SYMBOL_REF_P (src
) && !LABEL_REF_P (src
) && GET_CODE (src
) != CONST
)
25968 enum insn_form iform
= address_to_insn_form (src
, Pmode
,
25969 NON_PREFIXED_DEFAULT
);
25971 return (iform
== INSN_FORM_PCREL_EXTERNAL
|| iform
== INSN_FORM_PCREL_LOCAL
);
25974 /* Whether the next instruction needs a 'p' prefix issued before the
25975 instruction is printed out. */
25976 static bool next_insn_prefixed_p
;
25978 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25979 outputting the assembler code. On the PowerPC, we remember if the current
25980 insn is a prefixed insn where we need to emit a 'p' before the insn.
25982 In addition, if the insn is part of a PC-relative reference to an external
25983 label optimization, this is recorded also. */
25985 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
[], int)
25987 next_insn_prefixed_p
= (get_attr_prefixed (insn
) != PREFIXED_NO
);
25991 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25992 We use it to emit a 'p' for prefixed insns that is set in
25993 FINAL_PRESCAN_INSN. */
25995 rs6000_asm_output_opcode (FILE *stream
)
25997 if (next_insn_prefixed_p
)
25998 fprintf (stream
, "p");
26003 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26004 should be adjusted to reflect any required changes. This macro is used when
26005 there is some systematic length adjustment required that would be difficult
26006 to express in the length attribute.
26008 In the PowerPC, we use this to adjust the length of an instruction if one or
26009 more prefixed instructions are generated, using the attribute
26010 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26011 hardware requires that a prefied instruciton does not cross a 64-byte
26012 boundary. This means the compiler has to assume the length of the first
26013 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26014 already set for the non-prefixed instruction, we just need to udpate for the
26018 rs6000_adjust_insn_length (rtx_insn
*insn
, int length
)
26020 if (TARGET_PREFIXED
&& NONJUMP_INSN_P (insn
))
26022 rtx pattern
= PATTERN (insn
);
26023 if (GET_CODE (pattern
) != USE
&& GET_CODE (pattern
) != CLOBBER
26024 && get_attr_prefixed (insn
) == PREFIXED_YES
)
26026 int num_prefixed
= get_attr_max_prefixed_insns (insn
);
26027 length
+= 4 * (num_prefixed
+ 1);
26035 #ifdef HAVE_GAS_HIDDEN
26036 # define USE_HIDDEN_LINKONCE 1
26038 # define USE_HIDDEN_LINKONCE 0
26041 /* Fills in the label name that should be used for a 476 link stack thunk. */
26044 get_ppc476_thunk_name (char name
[32])
26046 gcc_assert (TARGET_LINK_STACK
);
26048 if (USE_HIDDEN_LINKONCE
)
26049 sprintf (name
, "__ppc476.get_thunk");
26051 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
26054 /* This function emits the simple thunk routine that is used to preserve
26055 the link stack on the 476 cpu. */
26057 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
26059 rs6000_code_end (void)
26064 if (!TARGET_LINK_STACK
)
26067 get_ppc476_thunk_name (name
);
26069 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
26070 build_function_type_list (void_type_node
, NULL_TREE
));
26071 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
26072 NULL_TREE
, void_type_node
);
26073 TREE_PUBLIC (decl
) = 1;
26074 TREE_STATIC (decl
) = 1;
26077 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
26079 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
26080 targetm
.asm_out
.unique_section (decl
, 0);
26081 switch_to_section (get_named_section (decl
, NULL
, 0));
26082 DECL_WEAK (decl
) = 1;
26083 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
26084 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
26085 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
26086 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
26091 switch_to_section (text_section
);
26092 ASM_OUTPUT_LABEL (asm_out_file
, name
);
26095 DECL_INITIAL (decl
) = make_node (BLOCK
);
26096 current_function_decl
= decl
;
26097 allocate_struct_function (decl
, false);
26098 init_function_start (decl
);
26099 first_function_block_is_cold
= false;
26100 /* Make sure unwind info is emitted for the thunk if needed. */
26101 final_start_function (emit_barrier (), asm_out_file
, 1);
26103 fputs ("\tblr\n", asm_out_file
);
26105 final_end_function ();
26106 init_insn_lengths ();
26107 free_after_compilation (cfun
);
26109 current_function_decl
= NULL
;
26112 /* Add r30 to hard reg set if the prologue sets it up and it is not
26113 pic_offset_table_rtx. */
26116 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
26118 if (!TARGET_SINGLE_PIC_BASE
26120 && TARGET_MINIMAL_TOC
26121 && !constant_pool_empty_p ())
26122 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
26123 if (cfun
->machine
->split_stack_argp_used
)
26124 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
26126 /* Make sure the hard reg set doesn't include r2, which was possibly added
26127 via PIC_OFFSET_TABLE_REGNUM. */
26129 remove_from_hard_reg_set (&set
->set
, Pmode
, TOC_REGNUM
);
26133 /* Helper function for rs6000_split_logical to emit a logical instruction after
26134 spliting the operation to single GPR registers.
26136 DEST is the destination register.
26137 OP1 and OP2 are the input source registers.
26138 CODE is the base operation (AND, IOR, XOR, NOT).
26139 MODE is the machine mode.
26140 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26141 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26142 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26145 rs6000_split_logical_inner (rtx dest
,
26148 enum rtx_code code
,
26150 bool complement_final_p
,
26151 bool complement_op1_p
,
26152 bool complement_op2_p
)
26156 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26157 if (op2
&& CONST_INT_P (op2
)
26158 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
26159 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
26161 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
26162 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
26164 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26169 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
26173 else if (value
== mask
)
26175 if (!rtx_equal_p (dest
, op1
))
26176 emit_insn (gen_rtx_SET (dest
, op1
));
26181 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26182 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26183 else if (code
== IOR
|| code
== XOR
)
26187 if (!rtx_equal_p (dest
, op1
))
26188 emit_insn (gen_rtx_SET (dest
, op1
));
26194 if (code
== AND
&& mode
== SImode
26195 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
26197 emit_insn (gen_andsi3 (dest
, op1
, op2
));
26201 if (complement_op1_p
)
26202 op1
= gen_rtx_NOT (mode
, op1
);
26204 if (complement_op2_p
)
26205 op2
= gen_rtx_NOT (mode
, op2
);
26207 /* For canonical RTL, if only one arm is inverted it is the first. */
26208 if (!complement_op1_p
&& complement_op2_p
)
26209 std::swap (op1
, op2
);
26211 bool_rtx
= ((code
== NOT
)
26212 ? gen_rtx_NOT (mode
, op1
)
26213 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
26215 if (complement_final_p
)
26216 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
26218 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
26221 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26222 operations are split immediately during RTL generation to allow for more
26223 optimizations of the AND/IOR/XOR.
26225 OPERANDS is an array containing the destination and two input operands.
26226 CODE is the base operation (AND, IOR, XOR, NOT).
26227 MODE is the machine mode.
26228 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26229 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26230 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26231 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26232 formation of the AND instructions. */
26235 rs6000_split_logical_di (rtx operands
[3],
26236 enum rtx_code code
,
26237 bool complement_final_p
,
26238 bool complement_op1_p
,
26239 bool complement_op2_p
)
26241 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
26242 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
26243 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
26244 enum hi_lo
{ hi
= 0, lo
= 1 };
26245 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
26248 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
26249 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
26250 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
26251 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
26254 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
26257 if (!CONST_INT_P (operands
[2]))
26259 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
26260 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
26264 HOST_WIDE_INT value
= INTVAL (operands
[2]);
26265 HOST_WIDE_INT value_hi_lo
[2];
26267 gcc_assert (!complement_final_p
);
26268 gcc_assert (!complement_op1_p
);
26269 gcc_assert (!complement_op2_p
);
26271 value_hi_lo
[hi
] = value
>> 32;
26272 value_hi_lo
[lo
] = value
& lower_32bits
;
26274 for (i
= 0; i
< 2; i
++)
26276 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
26278 if (sub_value
& sign_bit
)
26279 sub_value
|= upper_32bits
;
26281 op2_hi_lo
[i
] = GEN_INT (sub_value
);
26283 /* If this is an AND instruction, check to see if we need to load
26284 the value in a register. */
26285 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
26286 && !and_operand (op2_hi_lo
[i
], SImode
))
26287 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
26292 for (i
= 0; i
< 2; i
++)
26294 /* Split large IOR/XOR operations. */
26295 if ((code
== IOR
|| code
== XOR
)
26296 && CONST_INT_P (op2_hi_lo
[i
])
26297 && !complement_final_p
26298 && !complement_op1_p
26299 && !complement_op2_p
26300 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
26302 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
26303 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
26304 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
26305 rtx tmp
= gen_reg_rtx (SImode
);
26307 /* Make sure the constant is sign extended. */
26308 if ((hi_16bits
& sign_bit
) != 0)
26309 hi_16bits
|= upper_32bits
;
26311 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
26312 code
, SImode
, false, false, false);
26314 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
26315 code
, SImode
, false, false, false);
26318 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
26319 code
, SImode
, complement_final_p
,
26320 complement_op1_p
, complement_op2_p
);
26326 /* Split the insns that make up boolean operations operating on multiple GPR
26327 registers. The boolean MD patterns ensure that the inputs either are
26328 exactly the same as the output registers, or there is no overlap.
26330 OPERANDS is an array containing the destination and two input operands.
26331 CODE is the base operation (AND, IOR, XOR, NOT).
26332 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26333 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26334 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26337 rs6000_split_logical (rtx operands
[3],
26338 enum rtx_code code
,
26339 bool complement_final_p
,
26340 bool complement_op1_p
,
26341 bool complement_op2_p
)
26343 machine_mode mode
= GET_MODE (operands
[0]);
26344 machine_mode sub_mode
;
26346 int sub_size
, regno0
, regno1
, nregs
, i
;
26348 /* If this is DImode, use the specialized version that can run before
26349 register allocation. */
26350 if (mode
== DImode
&& !TARGET_POWERPC64
)
26352 rs6000_split_logical_di (operands
, code
, complement_final_p
,
26353 complement_op1_p
, complement_op2_p
);
26359 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
26360 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
26361 sub_size
= GET_MODE_SIZE (sub_mode
);
26362 regno0
= REGNO (op0
);
26363 regno1
= REGNO (op1
);
26365 gcc_assert (reload_completed
);
26366 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
26367 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
26369 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
26370 gcc_assert (nregs
> 1);
26372 if (op2
&& REG_P (op2
))
26373 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
26375 for (i
= 0; i
< nregs
; i
++)
26377 int offset
= i
* sub_size
;
26378 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
26379 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
26380 rtx sub_op2
= ((code
== NOT
)
26382 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
26384 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
26385 complement_final_p
, complement_op1_p
,
26393 /* Return true if the peephole2 can combine a load involving a combination of
26394 an addis instruction and a load with an offset that can be fused together on
26398 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
26399 rtx addis_value
, /* addis value. */
26400 rtx target
, /* target register that is loaded. */
26401 rtx mem
) /* bottom part of the memory addr. */
26406 /* Validate arguments. */
26407 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
26410 if (!base_reg_operand (target
, GET_MODE (target
)))
26413 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
26416 /* Allow sign/zero extension. */
26417 if (GET_CODE (mem
) == ZERO_EXTEND
26418 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
26419 mem
= XEXP (mem
, 0);
26424 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
26427 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
26428 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
26431 /* Validate that the register used to load the high value is either the
26432 register being loaded, or we can safely replace its use.
26434 This function is only called from the peephole2 pass and we assume that
26435 there are 2 instructions in the peephole (addis and load), so we want to
26436 check if the target register was not used in the memory address and the
26437 register to hold the addis result is dead after the peephole. */
26438 if (REGNO (addis_reg
) != REGNO (target
))
26440 if (reg_mentioned_p (target
, mem
))
26443 if (!peep2_reg_dead_p (2, addis_reg
))
26446 /* If the target register being loaded is the stack pointer, we must
26447 avoid loading any other value into it, even temporarily. */
26448 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
26452 base_reg
= XEXP (addr
, 0);
26453 return REGNO (addis_reg
) == REGNO (base_reg
);
26456 /* During the peephole2 pass, adjust and expand the insns for a load fusion
26457 sequence. We adjust the addis register to use the target register. If the
26458 load sign extends, we adjust the code to do the zero extending load, and an
26459 explicit sign extension later since the fusion only covers zero extending
26463 operands[0] register set with addis (to be replaced with target)
26464 operands[1] value set via addis
26465 operands[2] target register being loaded
26466 operands[3] D-form memory reference using operands[0]. */
26469 expand_fusion_gpr_load (rtx
*operands
)
26471 rtx addis_value
= operands
[1];
26472 rtx target
= operands
[2];
26473 rtx orig_mem
= operands
[3];
26474 rtx new_addr
, new_mem
, orig_addr
, offset
;
26475 enum rtx_code plus_or_lo_sum
;
26476 machine_mode target_mode
= GET_MODE (target
);
26477 machine_mode extend_mode
= target_mode
;
26478 machine_mode ptr_mode
= Pmode
;
26479 enum rtx_code extend
= UNKNOWN
;
26481 if (GET_CODE (orig_mem
) == ZERO_EXTEND
26482 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
26484 extend
= GET_CODE (orig_mem
);
26485 orig_mem
= XEXP (orig_mem
, 0);
26486 target_mode
= GET_MODE (orig_mem
);
26489 gcc_assert (MEM_P (orig_mem
));
26491 orig_addr
= XEXP (orig_mem
, 0);
26492 plus_or_lo_sum
= GET_CODE (orig_addr
);
26493 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
26495 offset
= XEXP (orig_addr
, 1);
26496 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
26497 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
26499 if (extend
!= UNKNOWN
)
26500 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
26502 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
26503 UNSPEC_FUSION_GPR
);
26504 emit_insn (gen_rtx_SET (target
, new_mem
));
26506 if (extend
== SIGN_EXTEND
)
26508 int sub_off
= ((BYTES_BIG_ENDIAN
)
26509 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
26512 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
26514 emit_insn (gen_rtx_SET (target
,
26515 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
26521 /* Emit the addis instruction that will be part of a fused instruction
26525 emit_fusion_addis (rtx target
, rtx addis_value
)
26528 const char *addis_str
= NULL
;
26530 /* Emit the addis instruction. */
26531 fuse_ops
[0] = target
;
26532 if (satisfies_constraint_L (addis_value
))
26534 fuse_ops
[1] = addis_value
;
26535 addis_str
= "lis %0,%v1";
26538 else if (GET_CODE (addis_value
) == PLUS
)
26540 rtx op0
= XEXP (addis_value
, 0);
26541 rtx op1
= XEXP (addis_value
, 1);
26543 if (REG_P (op0
) && CONST_INT_P (op1
)
26544 && satisfies_constraint_L (op1
))
26548 addis_str
= "addis %0,%1,%v2";
26552 else if (GET_CODE (addis_value
) == HIGH
)
26554 rtx value
= XEXP (addis_value
, 0);
26555 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
26557 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
26558 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
26560 addis_str
= "addis %0,%2,%1@toc@ha";
26562 else if (TARGET_XCOFF
)
26563 addis_str
= "addis %0,%1@u(%2)";
26566 gcc_unreachable ();
26569 else if (GET_CODE (value
) == PLUS
)
26571 rtx op0
= XEXP (value
, 0);
26572 rtx op1
= XEXP (value
, 1);
26574 if (GET_CODE (op0
) == UNSPEC
26575 && XINT (op0
, 1) == UNSPEC_TOCREL
26576 && CONST_INT_P (op1
))
26578 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
26579 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
26582 addis_str
= "addis %0,%2,%1+%3@toc@ha";
26584 else if (TARGET_XCOFF
)
26585 addis_str
= "addis %0,%1+%3@u(%2)";
26588 gcc_unreachable ();
26592 else if (satisfies_constraint_L (value
))
26594 fuse_ops
[1] = value
;
26595 addis_str
= "lis %0,%v1";
26598 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
26600 fuse_ops
[1] = value
;
26601 addis_str
= "lis %0,%1@ha";
26606 fatal_insn ("Could not generate addis value for fusion", addis_value
);
26608 output_asm_insn (addis_str
, fuse_ops
);
26611 /* Emit a D-form load or store instruction that is the second instruction
26612 of a fusion sequence. */
26615 emit_fusion_load (rtx load_reg
, rtx addis_reg
, rtx offset
, const char *insn_str
)
26618 char insn_template
[80];
26620 fuse_ops
[0] = load_reg
;
26621 fuse_ops
[1] = addis_reg
;
26623 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
26625 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
26626 fuse_ops
[2] = offset
;
26627 output_asm_insn (insn_template
, fuse_ops
);
26630 else if (GET_CODE (offset
) == UNSPEC
26631 && XINT (offset
, 1) == UNSPEC_TOCREL
)
26634 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
26636 else if (TARGET_XCOFF
)
26637 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
26640 gcc_unreachable ();
26642 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
26643 output_asm_insn (insn_template
, fuse_ops
);
26646 else if (GET_CODE (offset
) == PLUS
26647 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
26648 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
26649 && CONST_INT_P (XEXP (offset
, 1)))
26651 rtx tocrel_unspec
= XEXP (offset
, 0);
26653 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
26655 else if (TARGET_XCOFF
)
26656 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
26659 gcc_unreachable ();
26661 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
26662 fuse_ops
[3] = XEXP (offset
, 1);
26663 output_asm_insn (insn_template
, fuse_ops
);
26666 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
26668 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
26670 fuse_ops
[2] = offset
;
26671 output_asm_insn (insn_template
, fuse_ops
);
26675 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
26680 /* Given an address, convert it into the addis and load offset parts. Addresses
26681 created during the peephole2 process look like:
26682 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
26683 (unspec [(...)] UNSPEC_TOCREL)) */
26686 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
26690 if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
26692 hi
= XEXP (addr
, 0);
26693 lo
= XEXP (addr
, 1);
26696 gcc_unreachable ();
26702 /* Return a string to fuse an addis instruction with a gpr load to the same
26703 register that we loaded up the addis instruction. The address that is used
26704 is the logical address that was formed during peephole2:
26705 (lo_sum (high) (low-part))
26707 The code is complicated, so we call output_asm_insn directly, and just
26711 emit_fusion_gpr_load (rtx target
, rtx mem
)
26716 const char *load_str
= NULL
;
26719 if (GET_CODE (mem
) == ZERO_EXTEND
)
26720 mem
= XEXP (mem
, 0);
26722 gcc_assert (REG_P (target
) && MEM_P (mem
));
26724 addr
= XEXP (mem
, 0);
26725 fusion_split_address (addr
, &addis_value
, &load_offset
);
26727 /* Now emit the load instruction to the same register. */
26728 mode
= GET_MODE (mem
);
26746 gcc_assert (TARGET_POWERPC64
);
26751 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
26754 /* Emit the addis instruction. */
26755 emit_fusion_addis (target
, addis_value
);
26757 /* Emit the D-form load instruction. */
26758 emit_fusion_load (target
, target
, load_offset
, load_str
);
26764 #ifdef RS6000_GLIBC_ATOMIC_FENV
26765 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
26766 static tree atomic_hold_decl
, atomic_clear_decl
, atomic_update_decl
;
26769 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
26772 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
26774 if (!TARGET_HARD_FLOAT
)
26776 #ifdef RS6000_GLIBC_ATOMIC_FENV
26777 if (atomic_hold_decl
== NULL_TREE
)
26780 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
26781 get_identifier ("__atomic_feholdexcept"),
26782 build_function_type_list (void_type_node
,
26783 double_ptr_type_node
,
26785 TREE_PUBLIC (atomic_hold_decl
) = 1;
26786 DECL_EXTERNAL (atomic_hold_decl
) = 1;
26789 if (atomic_clear_decl
== NULL_TREE
)
26792 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
26793 get_identifier ("__atomic_feclearexcept"),
26794 build_function_type_list (void_type_node
,
26796 TREE_PUBLIC (atomic_clear_decl
) = 1;
26797 DECL_EXTERNAL (atomic_clear_decl
) = 1;
26800 tree const_double
= build_qualified_type (double_type_node
,
26802 tree const_double_ptr
= build_pointer_type (const_double
);
26803 if (atomic_update_decl
== NULL_TREE
)
26806 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
26807 get_identifier ("__atomic_feupdateenv"),
26808 build_function_type_list (void_type_node
,
26811 TREE_PUBLIC (atomic_update_decl
) = 1;
26812 DECL_EXTERNAL (atomic_update_decl
) = 1;
26815 tree fenv_var
= create_tmp_var_raw (double_type_node
);
26816 TREE_ADDRESSABLE (fenv_var
) = 1;
26817 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
,
26818 build4 (TARGET_EXPR
, double_type_node
, fenv_var
,
26819 void_node
, NULL_TREE
, NULL_TREE
));
26821 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
26822 *clear
= build_call_expr (atomic_clear_decl
, 0);
26823 *update
= build_call_expr (atomic_update_decl
, 1,
26824 fold_convert (const_double_ptr
, fenv_addr
));
26829 tree mffs
= rs6000_builtin_decls
[RS6000_BUILTIN_MFFS
];
26830 tree mtfsf
= rs6000_builtin_decls
[RS6000_BUILTIN_MTFSF
];
26831 tree call_mffs
= build_call_expr (mffs
, 0);
26833 /* Generates the equivalent of feholdexcept (&fenv_var)
26835 *fenv_var = __builtin_mffs ();
26837 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
26838 __builtin_mtfsf (0xff, fenv_hold); */
26840 /* Mask to clear everything except for the rounding modes and non-IEEE
26841 arithmetic flag. */
26842 const unsigned HOST_WIDE_INT hold_exception_mask
26843 = HOST_WIDE_INT_C (0xffffffff00000007);
26845 tree fenv_var
= create_tmp_var_raw (double_type_node
);
26847 tree hold_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_var
, call_mffs
,
26848 NULL_TREE
, NULL_TREE
);
26850 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
26851 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
26852 build_int_cst (uint64_type_node
,
26853 hold_exception_mask
));
26855 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
26858 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
26859 build_int_cst (unsigned_type_node
, 0xff),
26862 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
26864 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
26866 double fenv_clear = __builtin_mffs ();
26867 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
26868 __builtin_mtfsf (0xff, fenv_clear); */
26870 /* Mask to clear everything except for the rounding modes and non-IEEE
26871 arithmetic flag. */
26872 const unsigned HOST_WIDE_INT clear_exception_mask
26873 = HOST_WIDE_INT_C (0xffffffff00000000);
26875 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
26877 tree clear_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_clear
,
26878 call_mffs
, NULL_TREE
, NULL_TREE
);
26880 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
26881 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
26883 build_int_cst (uint64_type_node
,
26884 clear_exception_mask
));
26886 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
26887 fenv_clear_llu_and
);
26889 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
26890 build_int_cst (unsigned_type_node
, 0xff),
26893 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
26895 /* Generates the equivalent of feupdateenv (&fenv_var)
26897 double old_fenv = __builtin_mffs ();
26898 double fenv_update;
26899 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
26900 (*(uint64_t*)fenv_var 0x1ff80fff);
26901 __builtin_mtfsf (0xff, fenv_update); */
26903 const unsigned HOST_WIDE_INT update_exception_mask
26904 = HOST_WIDE_INT_C (0xffffffff1fffff00);
26905 const unsigned HOST_WIDE_INT new_exception_mask
26906 = HOST_WIDE_INT_C (0x1ff80fff);
26908 tree old_fenv
= create_tmp_var_raw (double_type_node
);
26909 tree update_mffs
= build4 (TARGET_EXPR
, double_type_node
, old_fenv
,
26910 call_mffs
, NULL_TREE
, NULL_TREE
);
26912 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
26913 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
26914 build_int_cst (uint64_type_node
,
26915 update_exception_mask
));
26917 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
26918 build_int_cst (uint64_type_node
,
26919 new_exception_mask
));
26921 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
26922 old_llu_and
, new_llu_and
);
26924 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
26927 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
26928 build_int_cst (unsigned_type_node
, 0xff),
26929 fenv_update_mtfsf
);
26931 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
26935 rs6000_generate_float2_double_code (rtx dst
, rtx src1
, rtx src2
)
26937 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
26939 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
26940 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
26942 /* The destination of the vmrgew instruction layout is:
26943 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26944 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26945 vmrgew instruction will be correct. */
26946 if (BYTES_BIG_ENDIAN
)
26948 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0
, src1
, src2
,
26950 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1
, src1
, src2
,
26955 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
26956 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
26959 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
26960 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
26962 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2
, rtx_tmp0
));
26963 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3
, rtx_tmp1
));
26965 if (BYTES_BIG_ENDIAN
)
26966 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
26968 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
26972 rs6000_generate_float2_code (bool signed_convert
, rtx dst
, rtx src1
, rtx src2
)
26974 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
26976 rtx_tmp0
= gen_reg_rtx (V2DImode
);
26977 rtx_tmp1
= gen_reg_rtx (V2DImode
);
26979 /* The destination of the vmrgew instruction layout is:
26980 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26981 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26982 vmrgew instruction will be correct. */
26983 if (BYTES_BIG_ENDIAN
)
26985 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
26986 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
26990 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
26991 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
26994 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
26995 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
26997 if (signed_convert
)
26999 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2
, rtx_tmp0
));
27000 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3
, rtx_tmp1
));
27004 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2
, rtx_tmp0
));
27005 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3
, rtx_tmp1
));
27008 if (BYTES_BIG_ENDIAN
)
27009 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
27011 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
27015 rs6000_generate_vsigned2_code (bool signed_convert
, rtx dst
, rtx src1
,
27018 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
27020 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
27021 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
27023 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
27024 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
27026 rtx_tmp2
= gen_reg_rtx (V4SImode
);
27027 rtx_tmp3
= gen_reg_rtx (V4SImode
);
27029 if (signed_convert
)
27031 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2
, rtx_tmp0
));
27032 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3
, rtx_tmp1
));
27036 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2
, rtx_tmp0
));
27037 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3
, rtx_tmp1
));
27040 emit_insn (gen_p8_vmrgew_v4si (dst
, rtx_tmp2
, rtx_tmp3
));
27043 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27046 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
27047 optimization_type opt_type
)
27052 return (opt_type
== OPTIMIZE_FOR_SPEED
27053 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
27060 /* Implement TARGET_CONSTANT_ALIGNMENT. */
27062 static HOST_WIDE_INT
27063 rs6000_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
27065 if (TREE_CODE (exp
) == STRING_CST
27066 && (STRICT_ALIGNMENT
|| !optimize_size
))
27067 return MAX (align
, BITS_PER_WORD
);
27071 /* Implement TARGET_STARTING_FRAME_OFFSET. */
27073 static HOST_WIDE_INT
27074 rs6000_starting_frame_offset (void)
27076 if (FRAME_GROWS_DOWNWARD
)
27078 return RS6000_STARTING_FRAME_OFFSET
;
27082 /* Create an alias for a mangled name where we have changed the mangling (in
27083 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
27084 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
27086 #if TARGET_ELF && RS6000_WEAK
27088 rs6000_globalize_decl_name (FILE * stream
, tree decl
)
27090 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
27092 targetm
.asm_out
.globalize_label (stream
, name
);
27094 if (rs6000_passes_ieee128
&& name
[0] == '_' && name
[1] == 'Z')
27096 tree save_asm_name
= DECL_ASSEMBLER_NAME (decl
);
27097 const char *old_name
;
27099 ieee128_mangling_gcc_8_1
= true;
27100 lang_hooks
.set_decl_assembler_name (decl
);
27101 old_name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
27102 SET_DECL_ASSEMBLER_NAME (decl
, save_asm_name
);
27103 ieee128_mangling_gcc_8_1
= false;
27105 if (strcmp (name
, old_name
) != 0)
27107 fprintf (stream
, "\t.weak %s\n", old_name
);
27108 fprintf (stream
, "\t.set %s,%s\n", old_name
, name
);
27115 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
27116 function names from <foo>l to <foo>f128 if the default long double type is
27117 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
27118 include file switches the names on systems that support long double as IEEE
27119 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
27120 In the future, glibc will export names like __ieee128_sinf128 and we can
27121 switch to using those instead of using sinf128, which pollutes the user's
27124 This will switch the names for Fortran math functions as well (which doesn't
27125 use math.h). However, Fortran needs other changes to the compiler and
27126 library before you can switch the real*16 type at compile time.
27128 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
27129 only do this if the default is that long double is IBM extended double, and
27130 the user asked for IEEE 128-bit. */
27133 rs6000_mangle_decl_assembler_name (tree decl
, tree id
)
27135 if (!TARGET_IEEEQUAD_DEFAULT
&& TARGET_IEEEQUAD
&& TARGET_LONG_DOUBLE_128
27136 && TREE_CODE (decl
) == FUNCTION_DECL
27137 && DECL_IS_UNDECLARED_BUILTIN (decl
))
27139 size_t len
= IDENTIFIER_LENGTH (id
);
27140 const char *name
= IDENTIFIER_POINTER (id
);
27142 if (name
[len
- 1] == 'l')
27144 bool uses_ieee128_p
= false;
27145 tree type
= TREE_TYPE (decl
);
27146 machine_mode ret_mode
= TYPE_MODE (type
);
27148 /* See if the function returns a IEEE 128-bit floating point type or
27150 if (ret_mode
== TFmode
|| ret_mode
== TCmode
)
27151 uses_ieee128_p
= true;
27154 function_args_iterator args_iter
;
27157 /* See if the function passes a IEEE 128-bit floating point type
27158 or complex type. */
27159 FOREACH_FUNCTION_ARGS (type
, arg
, args_iter
)
27161 machine_mode arg_mode
= TYPE_MODE (arg
);
27162 if (arg_mode
== TFmode
|| arg_mode
== TCmode
)
27164 uses_ieee128_p
= true;
27170 /* If we passed or returned an IEEE 128-bit floating point type,
27171 change the name. */
27172 if (uses_ieee128_p
)
27174 char *name2
= (char *) alloca (len
+ 4);
27175 memcpy (name2
, name
, len
- 1);
27176 strcpy (name2
+ len
- 1, "f128");
27177 id
= get_identifier (name2
);
27185 /* Predict whether the given loop in gimple will be transformed in the RTL
27186 doloop_optimize pass. */
27189 rs6000_predict_doloop_p (struct loop
*loop
)
27193 /* On rs6000, targetm.can_use_doloop_p is actually
27194 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
27195 if (loop
->inner
!= NULL
)
27197 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
27198 fprintf (dump_file
, "Predict doloop failure due to"
27199 " loop nesting.\n");
27206 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
27209 rs6000_cannot_substitute_mem_equiv_p (rtx mem
)
27211 gcc_assert (MEM_P (mem
));
27213 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
27214 type addresses, so don't allow MEMs with those address types to be
27215 substituted as an equivalent expression. See PR93974 for details. */
27216 if (GET_CODE (XEXP (mem
, 0)) == AND
)
27222 /* Implement TARGET_INVALID_CONVERSION. */
27224 static const char *
27225 rs6000_invalid_conversion (const_tree fromtype
, const_tree totype
)
27227 /* Make sure we're working with the canonical types. */
27228 if (TYPE_CANONICAL (fromtype
) != NULL_TREE
)
27229 fromtype
= TYPE_CANONICAL (fromtype
);
27230 if (TYPE_CANONICAL (totype
) != NULL_TREE
)
27231 totype
= TYPE_CANONICAL (totype
);
27233 machine_mode frommode
= TYPE_MODE (fromtype
);
27234 machine_mode tomode
= TYPE_MODE (totype
);
27236 if (frommode
!= tomode
)
27238 /* Do not allow conversions to/from XOmode and OOmode types. */
27239 if (frommode
== XOmode
)
27240 return N_("invalid conversion from type %<__vector_quad%>");
27241 if (tomode
== XOmode
)
27242 return N_("invalid conversion to type %<__vector_quad%>");
27243 if (frommode
== OOmode
)
27244 return N_("invalid conversion from type %<__vector_pair%>");
27245 if (tomode
== OOmode
)
27246 return N_("invalid conversion to type %<__vector_pair%>");
27248 else if (POINTER_TYPE_P (fromtype
) && POINTER_TYPE_P (totype
))
27250 /* We really care about the modes of the base types. */
27251 frommode
= TYPE_MODE (TREE_TYPE (fromtype
));
27252 tomode
= TYPE_MODE (TREE_TYPE (totype
));
27254 /* Do not allow conversions to/from XOmode and OOmode pointer
27255 types, except to/from void pointers. */
27256 if (frommode
!= tomode
27257 && frommode
!= VOIDmode
27258 && tomode
!= VOIDmode
)
27260 if (frommode
== XOmode
)
27261 return N_("invalid conversion from type %<* __vector_quad%>");
27262 if (tomode
== XOmode
)
27263 return N_("invalid conversion to type %<* __vector_quad%>");
27264 if (frommode
== OOmode
)
27265 return N_("invalid conversion from type %<* __vector_pair%>");
27266 if (tomode
== OOmode
)
27267 return N_("invalid conversion to type %<* __vector_pair%>");
27271 /* Conversion allowed. */
27276 rs6000_const_f32_to_i32 (rtx operand
)
27279 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (operand
);
27281 gcc_assert (GET_MODE (operand
) == SFmode
);
27282 REAL_VALUE_TO_TARGET_SINGLE (*rv
, value
);
27287 rs6000_emit_xxspltidp_v2df (rtx dst
, long value
)
27289 if (((value
& 0x7F800000) == 0) && ((value
& 0x7FFFFF) != 0))
27290 inform (input_location
,
27291 "the result for the xxspltidp instruction "
27292 "is undefined for subnormal input values");
27293 emit_insn( gen_xxspltidp_v2df_inst (dst
, GEN_INT (value
)));
27296 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
27299 rs6000_gen_pic_addr_diff_vec (void)
27301 return rs6000_relative_jumptables
;
27305 rs6000_output_addr_vec_elt (FILE *file
, int value
)
27307 const char *directive
= TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t";
27310 fprintf (file
, "%s", directive
);
27311 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", value
);
27312 assemble_name (file
, buf
);
27313 fprintf (file
, "\n");
27316 struct gcc_target targetm
= TARGET_INITIALIZER
;
27318 #include "gt-rs6000.h"