1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2021 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
35 #include "stringpool.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
46 #include "fold-const.h"
48 #include "stor-layout.h"
50 #include "print-tree.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
58 #include "sched-int.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-walk.h"
64 #include "tree-vectorizer.h"
65 #include "tree-ssa-propagate.h"
67 #include "tm-constrs.h"
68 #include "target-globals.h"
70 #include "tree-vector-builder.h"
72 #include "tree-pass.h"
75 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
77 #include "case-cfn-macros.h"
79 #include "rs6000-internal.h"
82 /* This file should be included last. */
83 #include "target-def.h"
85 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
86 systems will also set long double to be IEEE 128-bit. AIX and Darwin
87 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
88 those systems will not pick up this default. This needs to be after all
89 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
91 #ifndef TARGET_IEEEQUAD_DEFAULT
92 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
93 #define TARGET_IEEEQUAD_DEFAULT 1
95 #define TARGET_IEEEQUAD_DEFAULT 0
99 /* Don't enable PC-relative addressing if the target does not support it. */
100 #ifndef PCREL_SUPPORTED_BY_OS
101 #define PCREL_SUPPORTED_BY_OS 0
104 /* Support targetm.vectorize.builtin_mask_for_load. */
105 tree altivec_builtin_mask_for_load
;
108 /* Counter for labels which are to be placed in .fixup. */
109 int fixuplabelno
= 0;
112 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
115 /* Specify the machine mode that pointers have. After generation of rtl, the
116 compiler makes no further distinction between pointers and any other objects
117 of this machine mode. */
118 scalar_int_mode rs6000_pmode
;
121 /* Note whether IEEE 128-bit floating point was passed or returned, either as
122 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
123 floating point. We changed the default C++ mangling for these types and we
124 may want to generate a weak alias of the old mangling (U10__float128) to the
125 new mangling (u9__ieee128). */
126 bool rs6000_passes_ieee128
= false;
129 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
130 name used in current releases (i.e. u9__ieee128). */
131 static bool ieee128_mangling_gcc_8_1
;
133 /* Width in bits of a pointer. */
134 unsigned rs6000_pointer_size
;
136 #ifdef HAVE_AS_GNU_ATTRIBUTE
137 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
138 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
140 /* Flag whether floating point values have been passed/returned.
141 Note that this doesn't say whether fprs are used, since the
142 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
143 should be set for soft-float values passed in gprs and ieee128
144 values passed in vsx registers. */
145 bool rs6000_passes_float
= false;
146 bool rs6000_passes_long_double
= false;
147 /* Flag whether vector values have been passed/returned. */
148 bool rs6000_passes_vector
= false;
149 /* Flag whether small (<= 8 byte) structures have been returned. */
150 bool rs6000_returns_struct
= false;
153 /* Value is TRUE if register/mode pair is acceptable. */
154 static bool rs6000_hard_regno_mode_ok_p
155 [NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
157 /* Maximum number of registers needed for a given register class and mode. */
158 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
160 /* How many registers are needed for a given register and mode. */
161 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
163 /* Map register number to register class. */
164 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
166 static int dbg_cost_ctrl
;
168 /* Built in types. */
169 tree rs6000_builtin_types
[RS6000_BTI_MAX
];
170 tree rs6000_builtin_decls
[RS6000_BUILTIN_COUNT
];
172 /* Flag to say the TOC is initialized */
173 int toc_initialized
, need_toc_init
;
174 char toc_label_name
[10];
176 /* Cached value of rs6000_variable_issue. This is cached in
177 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
178 static short cached_can_issue_more
;
180 static GTY(()) section
*read_only_data_section
;
181 static GTY(()) section
*private_data_section
;
182 static GTY(()) section
*tls_data_section
;
183 static GTY(()) section
*tls_private_data_section
;
184 static GTY(()) section
*read_only_private_data_section
;
185 static GTY(()) section
*sdata2_section
;
187 section
*toc_section
= 0;
189 /* Describe the vector unit used for modes. */
190 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
191 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
193 /* Register classes for various constraints that are based on the target
195 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
197 /* Describe the alignment of a vector. */
198 int rs6000_vector_align
[NUM_MACHINE_MODES
];
200 /* Map selected modes to types for builtins. */
201 tree builtin_mode_to_type
[MAX_MACHINE_MODE
][2];
203 /* What modes to automatically generate reciprocal divide estimate (fre) and
204 reciprocal sqrt (frsqrte) for. */
205 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
207 /* Masks to determine which reciprocal esitmate instructions to generate
209 enum rs6000_recip_mask
{
210 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
211 RECIP_DF_DIV
= 0x002,
212 RECIP_V4SF_DIV
= 0x004,
213 RECIP_V2DF_DIV
= 0x008,
215 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
216 RECIP_DF_RSQRT
= 0x020,
217 RECIP_V4SF_RSQRT
= 0x040,
218 RECIP_V2DF_RSQRT
= 0x080,
220 /* Various combination of flags for -mrecip=xxx. */
222 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
223 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
224 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
226 RECIP_HIGH_PRECISION
= RECIP_ALL
,
228 /* On low precision machines like the power5, don't enable double precision
229 reciprocal square root estimate, since it isn't accurate enough. */
230 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
233 /* -mrecip options. */
236 const char *string
; /* option name */
237 unsigned int mask
; /* mask bits to set */
238 } recip_options
[] = {
239 { "all", RECIP_ALL
},
240 { "none", RECIP_NONE
},
241 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
243 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
244 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
245 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
246 | RECIP_V2DF_RSQRT
) },
247 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
248 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
251 /* On PowerPC, we have a limited number of target clones that we care about
252 which means we can use an array to hold the options, rather than having more
253 elaborate data structures to identify each possible variation. Order the
254 clones from the default to the highest ISA. */
256 CLONE_DEFAULT
= 0, /* default clone. */
257 CLONE_ISA_2_05
, /* ISA 2.05 (power6). */
258 CLONE_ISA_2_06
, /* ISA 2.06 (power7). */
259 CLONE_ISA_2_07
, /* ISA 2.07 (power8). */
260 CLONE_ISA_3_00
, /* ISA 3.0 (power9). */
261 CLONE_ISA_3_1
, /* ISA 3.1 (power10). */
265 /* Map compiler ISA bits into HWCAP names. */
267 HOST_WIDE_INT isa_mask
; /* rs6000_isa mask */
268 const char *name
; /* name to use in __builtin_cpu_supports. */
271 static const struct clone_map rs6000_clone_map
[CLONE_MAX
] = {
272 { 0, "" }, /* Default options. */
273 { OPTION_MASK_CMPB
, "arch_2_05" }, /* ISA 2.05 (power6). */
274 { OPTION_MASK_POPCNTD
, "arch_2_06" }, /* ISA 2.06 (power7). */
275 { OPTION_MASK_P8_VECTOR
, "arch_2_07" }, /* ISA 2.07 (power8). */
276 { OPTION_MASK_P9_VECTOR
, "arch_3_00" }, /* ISA 3.0 (power9). */
277 { OPTION_MASK_POWER10
, "arch_3_1" }, /* ISA 3.1 (power10). */
281 /* Newer LIBCs explicitly export this symbol to declare that they provide
282 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
283 reference to this symbol whenever we expand a CPU builtin, so that
284 we never link against an old LIBC. */
285 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
287 /* True if we have expanded a CPU builtin. */
288 bool cpu_builtin_p
= false;
290 /* Pointer to function (in rs6000-c.c) that can define or undefine target
291 macros that have changed. Languages that don't support the preprocessor
292 don't link in rs6000-c.c, so we can't call it directly. */
293 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
, HOST_WIDE_INT
);
295 /* Simplfy register classes into simpler classifications. We assume
296 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
297 check for standard register classes (gpr/floating/altivec/vsx) and
298 floating/vector classes (float/altivec/vsx). */
300 enum rs6000_reg_type
{
311 /* Map register class to register type. */
312 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
314 /* First/last register type for the 'normal' register types (i.e. general
315 purpose, floating point, altivec, and VSX registers). */
316 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
318 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
321 /* Register classes we care about in secondary reload or go if legitimate
322 address. We only need to worry about GPR, FPR, and Altivec registers here,
323 along an ANY field that is the OR of the 3 register classes. */
325 enum rs6000_reload_reg_type
{
326 RELOAD_REG_GPR
, /* General purpose registers. */
327 RELOAD_REG_FPR
, /* Traditional floating point regs. */
328 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
329 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
333 /* For setting up register classes, loop through the 3 register classes mapping
334 into real registers, and skip the ANY class, which is just an OR of the
336 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
337 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
339 /* Map reload register type to a register in the register class. */
340 struct reload_reg_map_type
{
341 const char *name
; /* Register class name. */
342 int reg
; /* Register in the register class. */
345 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
346 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
347 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
348 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
349 { "Any", -1 }, /* RELOAD_REG_ANY. */
352 /* Mask bits for each register class, indexed per mode. Historically the
353 compiler has been more restrictive which types can do PRE_MODIFY instead of
354 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
355 typedef unsigned char addr_mask_type
;
357 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
358 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
359 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
360 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
361 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
362 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
363 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
364 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
366 /* Register type masks based on the type, of valid addressing modes. */
367 struct rs6000_reg_addr
{
368 enum insn_code reload_load
; /* INSN to reload for loading. */
369 enum insn_code reload_store
; /* INSN to reload for storing. */
370 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
371 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
372 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
373 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
374 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
377 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
379 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
381 mode_supports_pre_incdec_p (machine_mode mode
)
383 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
387 /* Helper function to say whether a mode supports PRE_MODIFY. */
389 mode_supports_pre_modify_p (machine_mode mode
)
391 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
395 /* Return true if we have D-form addressing in altivec registers. */
397 mode_supports_vmx_dform (machine_mode mode
)
399 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
402 /* Return true if we have D-form addressing in VSX registers. This addressing
403 is more limited than normal d-form addressing in that the offset must be
404 aligned on a 16-byte boundary. */
406 mode_supports_dq_form (machine_mode mode
)
408 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
412 /* Given that there exists at least one variable that is set (produced)
413 by OUT_INSN and read (consumed) by IN_INSN, return true iff
414 IN_INSN represents one or more memory store operations and none of
415 the variables set by OUT_INSN is used by IN_INSN as the address of a
416 store operation. If either IN_INSN or OUT_INSN does not represent
417 a "single" RTL SET expression (as loosely defined by the
418 implementation of the single_set function) or a PARALLEL with only
419 SETs, CLOBBERs, and USEs inside, this function returns false.
421 This rs6000-specific version of store_data_bypass_p checks for
422 certain conditions that result in assertion failures (and internal
423 compiler errors) in the generic store_data_bypass_p function and
424 returns false rather than calling store_data_bypass_p if one of the
425 problematic conditions is detected. */
428 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
435 in_set
= single_set (in_insn
);
438 if (MEM_P (SET_DEST (in_set
)))
440 out_set
= single_set (out_insn
);
443 out_pat
= PATTERN (out_insn
);
444 if (GET_CODE (out_pat
) == PARALLEL
)
446 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
448 out_exp
= XVECEXP (out_pat
, 0, i
);
449 if ((GET_CODE (out_exp
) == CLOBBER
)
450 || (GET_CODE (out_exp
) == USE
))
452 else if (GET_CODE (out_exp
) != SET
)
461 in_pat
= PATTERN (in_insn
);
462 if (GET_CODE (in_pat
) != PARALLEL
)
465 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
467 in_exp
= XVECEXP (in_pat
, 0, i
);
468 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
470 else if (GET_CODE (in_exp
) != SET
)
473 if (MEM_P (SET_DEST (in_exp
)))
475 out_set
= single_set (out_insn
);
478 out_pat
= PATTERN (out_insn
);
479 if (GET_CODE (out_pat
) != PARALLEL
)
481 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
483 out_exp
= XVECEXP (out_pat
, 0, j
);
484 if ((GET_CODE (out_exp
) == CLOBBER
)
485 || (GET_CODE (out_exp
) == USE
))
487 else if (GET_CODE (out_exp
) != SET
)
494 return store_data_bypass_p (out_insn
, in_insn
);
498 /* Processor costs (relative to an add) */
500 const struct processor_costs
*rs6000_cost
;
502 /* Instruction size costs on 32bit processors. */
504 struct processor_costs size32_cost
= {
505 COSTS_N_INSNS (1), /* mulsi */
506 COSTS_N_INSNS (1), /* mulsi_const */
507 COSTS_N_INSNS (1), /* mulsi_const9 */
508 COSTS_N_INSNS (1), /* muldi */
509 COSTS_N_INSNS (1), /* divsi */
510 COSTS_N_INSNS (1), /* divdi */
511 COSTS_N_INSNS (1), /* fp */
512 COSTS_N_INSNS (1), /* dmul */
513 COSTS_N_INSNS (1), /* sdiv */
514 COSTS_N_INSNS (1), /* ddiv */
515 32, /* cache line size */
519 0, /* SF->DF convert */
522 /* Instruction size costs on 64bit processors. */
524 struct processor_costs size64_cost
= {
525 COSTS_N_INSNS (1), /* mulsi */
526 COSTS_N_INSNS (1), /* mulsi_const */
527 COSTS_N_INSNS (1), /* mulsi_const9 */
528 COSTS_N_INSNS (1), /* muldi */
529 COSTS_N_INSNS (1), /* divsi */
530 COSTS_N_INSNS (1), /* divdi */
531 COSTS_N_INSNS (1), /* fp */
532 COSTS_N_INSNS (1), /* dmul */
533 COSTS_N_INSNS (1), /* sdiv */
534 COSTS_N_INSNS (1), /* ddiv */
535 128, /* cache line size */
539 0, /* SF->DF convert */
542 /* Instruction costs on RS64A processors. */
544 struct processor_costs rs64a_cost
= {
545 COSTS_N_INSNS (20), /* mulsi */
546 COSTS_N_INSNS (12), /* mulsi_const */
547 COSTS_N_INSNS (8), /* mulsi_const9 */
548 COSTS_N_INSNS (34), /* muldi */
549 COSTS_N_INSNS (65), /* divsi */
550 COSTS_N_INSNS (67), /* divdi */
551 COSTS_N_INSNS (4), /* fp */
552 COSTS_N_INSNS (4), /* dmul */
553 COSTS_N_INSNS (31), /* sdiv */
554 COSTS_N_INSNS (31), /* ddiv */
555 128, /* cache line size */
559 0, /* SF->DF convert */
562 /* Instruction costs on MPCCORE processors. */
564 struct processor_costs mpccore_cost
= {
565 COSTS_N_INSNS (2), /* mulsi */
566 COSTS_N_INSNS (2), /* mulsi_const */
567 COSTS_N_INSNS (2), /* mulsi_const9 */
568 COSTS_N_INSNS (2), /* muldi */
569 COSTS_N_INSNS (6), /* divsi */
570 COSTS_N_INSNS (6), /* divdi */
571 COSTS_N_INSNS (4), /* fp */
572 COSTS_N_INSNS (5), /* dmul */
573 COSTS_N_INSNS (10), /* sdiv */
574 COSTS_N_INSNS (17), /* ddiv */
575 32, /* cache line size */
579 0, /* SF->DF convert */
582 /* Instruction costs on PPC403 processors. */
584 struct processor_costs ppc403_cost
= {
585 COSTS_N_INSNS (4), /* mulsi */
586 COSTS_N_INSNS (4), /* mulsi_const */
587 COSTS_N_INSNS (4), /* mulsi_const9 */
588 COSTS_N_INSNS (4), /* muldi */
589 COSTS_N_INSNS (33), /* divsi */
590 COSTS_N_INSNS (33), /* divdi */
591 COSTS_N_INSNS (11), /* fp */
592 COSTS_N_INSNS (11), /* dmul */
593 COSTS_N_INSNS (11), /* sdiv */
594 COSTS_N_INSNS (11), /* ddiv */
595 32, /* cache line size */
599 0, /* SF->DF convert */
602 /* Instruction costs on PPC405 processors. */
604 struct processor_costs ppc405_cost
= {
605 COSTS_N_INSNS (5), /* mulsi */
606 COSTS_N_INSNS (4), /* mulsi_const */
607 COSTS_N_INSNS (3), /* mulsi_const9 */
608 COSTS_N_INSNS (5), /* muldi */
609 COSTS_N_INSNS (35), /* divsi */
610 COSTS_N_INSNS (35), /* divdi */
611 COSTS_N_INSNS (11), /* fp */
612 COSTS_N_INSNS (11), /* dmul */
613 COSTS_N_INSNS (11), /* sdiv */
614 COSTS_N_INSNS (11), /* ddiv */
615 32, /* cache line size */
619 0, /* SF->DF convert */
622 /* Instruction costs on PPC440 processors. */
624 struct processor_costs ppc440_cost
= {
625 COSTS_N_INSNS (3), /* mulsi */
626 COSTS_N_INSNS (2), /* mulsi_const */
627 COSTS_N_INSNS (2), /* mulsi_const9 */
628 COSTS_N_INSNS (3), /* muldi */
629 COSTS_N_INSNS (34), /* divsi */
630 COSTS_N_INSNS (34), /* divdi */
631 COSTS_N_INSNS (5), /* fp */
632 COSTS_N_INSNS (5), /* dmul */
633 COSTS_N_INSNS (19), /* sdiv */
634 COSTS_N_INSNS (33), /* ddiv */
635 32, /* cache line size */
639 0, /* SF->DF convert */
642 /* Instruction costs on PPC476 processors. */
644 struct processor_costs ppc476_cost
= {
645 COSTS_N_INSNS (4), /* mulsi */
646 COSTS_N_INSNS (4), /* mulsi_const */
647 COSTS_N_INSNS (4), /* mulsi_const9 */
648 COSTS_N_INSNS (4), /* muldi */
649 COSTS_N_INSNS (11), /* divsi */
650 COSTS_N_INSNS (11), /* divdi */
651 COSTS_N_INSNS (6), /* fp */
652 COSTS_N_INSNS (6), /* dmul */
653 COSTS_N_INSNS (19), /* sdiv */
654 COSTS_N_INSNS (33), /* ddiv */
655 32, /* l1 cache line size */
659 0, /* SF->DF convert */
662 /* Instruction costs on PPC601 processors. */
664 struct processor_costs ppc601_cost
= {
665 COSTS_N_INSNS (5), /* mulsi */
666 COSTS_N_INSNS (5), /* mulsi_const */
667 COSTS_N_INSNS (5), /* mulsi_const9 */
668 COSTS_N_INSNS (5), /* muldi */
669 COSTS_N_INSNS (36), /* divsi */
670 COSTS_N_INSNS (36), /* divdi */
671 COSTS_N_INSNS (4), /* fp */
672 COSTS_N_INSNS (5), /* dmul */
673 COSTS_N_INSNS (17), /* sdiv */
674 COSTS_N_INSNS (31), /* ddiv */
675 32, /* cache line size */
679 0, /* SF->DF convert */
682 /* Instruction costs on PPC603 processors. */
684 struct processor_costs ppc603_cost
= {
685 COSTS_N_INSNS (5), /* mulsi */
686 COSTS_N_INSNS (3), /* mulsi_const */
687 COSTS_N_INSNS (2), /* mulsi_const9 */
688 COSTS_N_INSNS (5), /* muldi */
689 COSTS_N_INSNS (37), /* divsi */
690 COSTS_N_INSNS (37), /* divdi */
691 COSTS_N_INSNS (3), /* fp */
692 COSTS_N_INSNS (4), /* dmul */
693 COSTS_N_INSNS (18), /* sdiv */
694 COSTS_N_INSNS (33), /* ddiv */
695 32, /* cache line size */
699 0, /* SF->DF convert */
702 /* Instruction costs on PPC604 processors. */
704 struct processor_costs ppc604_cost
= {
705 COSTS_N_INSNS (4), /* mulsi */
706 COSTS_N_INSNS (4), /* mulsi_const */
707 COSTS_N_INSNS (4), /* mulsi_const9 */
708 COSTS_N_INSNS (4), /* muldi */
709 COSTS_N_INSNS (20), /* divsi */
710 COSTS_N_INSNS (20), /* divdi */
711 COSTS_N_INSNS (3), /* fp */
712 COSTS_N_INSNS (3), /* dmul */
713 COSTS_N_INSNS (18), /* sdiv */
714 COSTS_N_INSNS (32), /* ddiv */
715 32, /* cache line size */
719 0, /* SF->DF convert */
722 /* Instruction costs on PPC604e processors. */
724 struct processor_costs ppc604e_cost
= {
725 COSTS_N_INSNS (2), /* mulsi */
726 COSTS_N_INSNS (2), /* mulsi_const */
727 COSTS_N_INSNS (2), /* mulsi_const9 */
728 COSTS_N_INSNS (2), /* muldi */
729 COSTS_N_INSNS (20), /* divsi */
730 COSTS_N_INSNS (20), /* divdi */
731 COSTS_N_INSNS (3), /* fp */
732 COSTS_N_INSNS (3), /* dmul */
733 COSTS_N_INSNS (18), /* sdiv */
734 COSTS_N_INSNS (32), /* ddiv */
735 32, /* cache line size */
739 0, /* SF->DF convert */
742 /* Instruction costs on PPC620 processors. */
744 struct processor_costs ppc620_cost
= {
745 COSTS_N_INSNS (5), /* mulsi */
746 COSTS_N_INSNS (4), /* mulsi_const */
747 COSTS_N_INSNS (3), /* mulsi_const9 */
748 COSTS_N_INSNS (7), /* muldi */
749 COSTS_N_INSNS (21), /* divsi */
750 COSTS_N_INSNS (37), /* divdi */
751 COSTS_N_INSNS (3), /* fp */
752 COSTS_N_INSNS (3), /* dmul */
753 COSTS_N_INSNS (18), /* sdiv */
754 COSTS_N_INSNS (32), /* ddiv */
755 128, /* cache line size */
759 0, /* SF->DF convert */
762 /* Instruction costs on PPC630 processors. */
764 struct processor_costs ppc630_cost
= {
765 COSTS_N_INSNS (5), /* mulsi */
766 COSTS_N_INSNS (4), /* mulsi_const */
767 COSTS_N_INSNS (3), /* mulsi_const9 */
768 COSTS_N_INSNS (7), /* muldi */
769 COSTS_N_INSNS (21), /* divsi */
770 COSTS_N_INSNS (37), /* divdi */
771 COSTS_N_INSNS (3), /* fp */
772 COSTS_N_INSNS (3), /* dmul */
773 COSTS_N_INSNS (17), /* sdiv */
774 COSTS_N_INSNS (21), /* ddiv */
775 128, /* cache line size */
779 0, /* SF->DF convert */
782 /* Instruction costs on Cell processor. */
783 /* COSTS_N_INSNS (1) ~ one add. */
785 struct processor_costs ppccell_cost
= {
786 COSTS_N_INSNS (9/2)+2, /* mulsi */
787 COSTS_N_INSNS (6/2), /* mulsi_const */
788 COSTS_N_INSNS (6/2), /* mulsi_const9 */
789 COSTS_N_INSNS (15/2)+2, /* muldi */
790 COSTS_N_INSNS (38/2), /* divsi */
791 COSTS_N_INSNS (70/2), /* divdi */
792 COSTS_N_INSNS (10/2), /* fp */
793 COSTS_N_INSNS (10/2), /* dmul */
794 COSTS_N_INSNS (74/2), /* sdiv */
795 COSTS_N_INSNS (74/2), /* ddiv */
796 128, /* cache line size */
800 0, /* SF->DF convert */
803 /* Instruction costs on PPC750 and PPC7400 processors. */
805 struct processor_costs ppc750_cost
= {
806 COSTS_N_INSNS (5), /* mulsi */
807 COSTS_N_INSNS (3), /* mulsi_const */
808 COSTS_N_INSNS (2), /* mulsi_const9 */
809 COSTS_N_INSNS (5), /* muldi */
810 COSTS_N_INSNS (17), /* divsi */
811 COSTS_N_INSNS (17), /* divdi */
812 COSTS_N_INSNS (3), /* fp */
813 COSTS_N_INSNS (3), /* dmul */
814 COSTS_N_INSNS (17), /* sdiv */
815 COSTS_N_INSNS (31), /* ddiv */
816 32, /* cache line size */
820 0, /* SF->DF convert */
823 /* Instruction costs on PPC7450 processors. */
825 struct processor_costs ppc7450_cost
= {
826 COSTS_N_INSNS (4), /* mulsi */
827 COSTS_N_INSNS (3), /* mulsi_const */
828 COSTS_N_INSNS (3), /* mulsi_const9 */
829 COSTS_N_INSNS (4), /* muldi */
830 COSTS_N_INSNS (23), /* divsi */
831 COSTS_N_INSNS (23), /* divdi */
832 COSTS_N_INSNS (5), /* fp */
833 COSTS_N_INSNS (5), /* dmul */
834 COSTS_N_INSNS (21), /* sdiv */
835 COSTS_N_INSNS (35), /* ddiv */
836 32, /* cache line size */
840 0, /* SF->DF convert */
843 /* Instruction costs on PPC8540 processors. */
845 struct processor_costs ppc8540_cost
= {
846 COSTS_N_INSNS (4), /* mulsi */
847 COSTS_N_INSNS (4), /* mulsi_const */
848 COSTS_N_INSNS (4), /* mulsi_const9 */
849 COSTS_N_INSNS (4), /* muldi */
850 COSTS_N_INSNS (19), /* divsi */
851 COSTS_N_INSNS (19), /* divdi */
852 COSTS_N_INSNS (4), /* fp */
853 COSTS_N_INSNS (4), /* dmul */
854 COSTS_N_INSNS (29), /* sdiv */
855 COSTS_N_INSNS (29), /* ddiv */
856 32, /* cache line size */
859 1, /* prefetch streams /*/
860 0, /* SF->DF convert */
863 /* Instruction costs on E300C2 and E300C3 cores. */
865 struct processor_costs ppce300c2c3_cost
= {
866 COSTS_N_INSNS (4), /* mulsi */
867 COSTS_N_INSNS (4), /* mulsi_const */
868 COSTS_N_INSNS (4), /* mulsi_const9 */
869 COSTS_N_INSNS (4), /* muldi */
870 COSTS_N_INSNS (19), /* divsi */
871 COSTS_N_INSNS (19), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (4), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (33), /* ddiv */
879 1, /* prefetch streams /*/
880 0, /* SF->DF convert */
883 /* Instruction costs on PPCE500MC processors. */
885 struct processor_costs ppce500mc_cost
= {
886 COSTS_N_INSNS (4), /* mulsi */
887 COSTS_N_INSNS (4), /* mulsi_const */
888 COSTS_N_INSNS (4), /* mulsi_const9 */
889 COSTS_N_INSNS (4), /* muldi */
890 COSTS_N_INSNS (14), /* divsi */
891 COSTS_N_INSNS (14), /* divdi */
892 COSTS_N_INSNS (8), /* fp */
893 COSTS_N_INSNS (10), /* dmul */
894 COSTS_N_INSNS (36), /* sdiv */
895 COSTS_N_INSNS (66), /* ddiv */
896 64, /* cache line size */
899 1, /* prefetch streams /*/
900 0, /* SF->DF convert */
903 /* Instruction costs on PPCE500MC64 processors. */
905 struct processor_costs ppce500mc64_cost
= {
906 COSTS_N_INSNS (4), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (4), /* mulsi_const9 */
909 COSTS_N_INSNS (4), /* muldi */
910 COSTS_N_INSNS (14), /* divsi */
911 COSTS_N_INSNS (14), /* divdi */
912 COSTS_N_INSNS (4), /* fp */
913 COSTS_N_INSNS (10), /* dmul */
914 COSTS_N_INSNS (36), /* sdiv */
915 COSTS_N_INSNS (66), /* ddiv */
916 64, /* cache line size */
919 1, /* prefetch streams /*/
920 0, /* SF->DF convert */
923 /* Instruction costs on PPCE5500 processors. */
925 struct processor_costs ppce5500_cost
= {
926 COSTS_N_INSNS (5), /* mulsi */
927 COSTS_N_INSNS (5), /* mulsi_const */
928 COSTS_N_INSNS (4), /* mulsi_const9 */
929 COSTS_N_INSNS (5), /* muldi */
930 COSTS_N_INSNS (14), /* divsi */
931 COSTS_N_INSNS (14), /* divdi */
932 COSTS_N_INSNS (7), /* fp */
933 COSTS_N_INSNS (10), /* dmul */
934 COSTS_N_INSNS (36), /* sdiv */
935 COSTS_N_INSNS (66), /* ddiv */
936 64, /* cache line size */
939 1, /* prefetch streams /*/
940 0, /* SF->DF convert */
943 /* Instruction costs on PPCE6500 processors. */
945 struct processor_costs ppce6500_cost
= {
946 COSTS_N_INSNS (5), /* mulsi */
947 COSTS_N_INSNS (5), /* mulsi_const */
948 COSTS_N_INSNS (4), /* mulsi_const9 */
949 COSTS_N_INSNS (5), /* muldi */
950 COSTS_N_INSNS (14), /* divsi */
951 COSTS_N_INSNS (14), /* divdi */
952 COSTS_N_INSNS (7), /* fp */
953 COSTS_N_INSNS (10), /* dmul */
954 COSTS_N_INSNS (36), /* sdiv */
955 COSTS_N_INSNS (66), /* ddiv */
956 64, /* cache line size */
959 1, /* prefetch streams /*/
960 0, /* SF->DF convert */
963 /* Instruction costs on AppliedMicro Titan processors. */
965 struct processor_costs titan_cost
= {
966 COSTS_N_INSNS (5), /* mulsi */
967 COSTS_N_INSNS (5), /* mulsi_const */
968 COSTS_N_INSNS (5), /* mulsi_const9 */
969 COSTS_N_INSNS (5), /* muldi */
970 COSTS_N_INSNS (18), /* divsi */
971 COSTS_N_INSNS (18), /* divdi */
972 COSTS_N_INSNS (10), /* fp */
973 COSTS_N_INSNS (10), /* dmul */
974 COSTS_N_INSNS (46), /* sdiv */
975 COSTS_N_INSNS (72), /* ddiv */
976 32, /* cache line size */
979 1, /* prefetch streams /*/
980 0, /* SF->DF convert */
983 /* Instruction costs on POWER4 and POWER5 processors. */
985 struct processor_costs power4_cost
= {
986 COSTS_N_INSNS (3), /* mulsi */
987 COSTS_N_INSNS (2), /* mulsi_const */
988 COSTS_N_INSNS (2), /* mulsi_const9 */
989 COSTS_N_INSNS (4), /* muldi */
990 COSTS_N_INSNS (18), /* divsi */
991 COSTS_N_INSNS (34), /* divdi */
992 COSTS_N_INSNS (3), /* fp */
993 COSTS_N_INSNS (3), /* dmul */
994 COSTS_N_INSNS (17), /* sdiv */
995 COSTS_N_INSNS (17), /* ddiv */
996 128, /* cache line size */
999 8, /* prefetch streams /*/
1000 0, /* SF->DF convert */
1003 /* Instruction costs on POWER6 processors. */
1005 struct processor_costs power6_cost
= {
1006 COSTS_N_INSNS (8), /* mulsi */
1007 COSTS_N_INSNS (8), /* mulsi_const */
1008 COSTS_N_INSNS (8), /* mulsi_const9 */
1009 COSTS_N_INSNS (8), /* muldi */
1010 COSTS_N_INSNS (22), /* divsi */
1011 COSTS_N_INSNS (28), /* divdi */
1012 COSTS_N_INSNS (3), /* fp */
1013 COSTS_N_INSNS (3), /* dmul */
1014 COSTS_N_INSNS (13), /* sdiv */
1015 COSTS_N_INSNS (16), /* ddiv */
1016 128, /* cache line size */
1018 2048, /* l2 cache */
1019 16, /* prefetch streams */
1020 0, /* SF->DF convert */
1023 /* Instruction costs on POWER7 processors. */
1025 struct processor_costs power7_cost
= {
1026 COSTS_N_INSNS (2), /* mulsi */
1027 COSTS_N_INSNS (2), /* mulsi_const */
1028 COSTS_N_INSNS (2), /* mulsi_const9 */
1029 COSTS_N_INSNS (2), /* muldi */
1030 COSTS_N_INSNS (18), /* divsi */
1031 COSTS_N_INSNS (34), /* divdi */
1032 COSTS_N_INSNS (3), /* fp */
1033 COSTS_N_INSNS (3), /* dmul */
1034 COSTS_N_INSNS (13), /* sdiv */
1035 COSTS_N_INSNS (16), /* ddiv */
1036 128, /* cache line size */
1039 12, /* prefetch streams */
1040 COSTS_N_INSNS (3), /* SF->DF convert */
1043 /* Instruction costs on POWER8 processors. */
1045 struct processor_costs power8_cost
= {
1046 COSTS_N_INSNS (3), /* mulsi */
1047 COSTS_N_INSNS (3), /* mulsi_const */
1048 COSTS_N_INSNS (3), /* mulsi_const9 */
1049 COSTS_N_INSNS (3), /* muldi */
1050 COSTS_N_INSNS (19), /* divsi */
1051 COSTS_N_INSNS (35), /* divdi */
1052 COSTS_N_INSNS (3), /* fp */
1053 COSTS_N_INSNS (3), /* dmul */
1054 COSTS_N_INSNS (14), /* sdiv */
1055 COSTS_N_INSNS (17), /* ddiv */
1056 128, /* cache line size */
1059 12, /* prefetch streams */
1060 COSTS_N_INSNS (3), /* SF->DF convert */
1063 /* Instruction costs on POWER9 processors. */
1065 struct processor_costs power9_cost
= {
1066 COSTS_N_INSNS (3), /* mulsi */
1067 COSTS_N_INSNS (3), /* mulsi_const */
1068 COSTS_N_INSNS (3), /* mulsi_const9 */
1069 COSTS_N_INSNS (3), /* muldi */
1070 COSTS_N_INSNS (8), /* divsi */
1071 COSTS_N_INSNS (12), /* divdi */
1072 COSTS_N_INSNS (3), /* fp */
1073 COSTS_N_INSNS (3), /* dmul */
1074 COSTS_N_INSNS (13), /* sdiv */
1075 COSTS_N_INSNS (18), /* ddiv */
1076 128, /* cache line size */
1079 8, /* prefetch streams */
1080 COSTS_N_INSNS (3), /* SF->DF convert */
1083 /* Instruction costs on POWER A2 processors. */
1085 struct processor_costs ppca2_cost
= {
1086 COSTS_N_INSNS (16), /* mulsi */
1087 COSTS_N_INSNS (16), /* mulsi_const */
1088 COSTS_N_INSNS (16), /* mulsi_const9 */
1089 COSTS_N_INSNS (16), /* muldi */
1090 COSTS_N_INSNS (22), /* divsi */
1091 COSTS_N_INSNS (28), /* divdi */
1092 COSTS_N_INSNS (3), /* fp */
1093 COSTS_N_INSNS (3), /* dmul */
1094 COSTS_N_INSNS (59), /* sdiv */
1095 COSTS_N_INSNS (72), /* ddiv */
1098 2048, /* l2 cache */
1099 16, /* prefetch streams */
1100 0, /* SF->DF convert */
1103 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1104 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1107 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool);
1108 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1109 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1110 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1111 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1112 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
);
1113 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1114 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1115 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1117 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1119 static bool is_microcoded_insn (rtx_insn
*);
1120 static bool is_nonpipeline_insn (rtx_insn
*);
1121 static bool is_cracked_insn (rtx_insn
*);
1122 static bool is_load_insn (rtx
, rtx
*);
1123 static bool is_store_insn (rtx
, rtx
*);
1124 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1125 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1126 static bool insn_must_be_first_in_group (rtx_insn
*);
1127 static bool insn_must_be_last_in_group (rtx_insn
*);
1128 int easy_vector_constant (rtx
, machine_mode
);
1129 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1130 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1132 static tree
get_prev_label (tree
);
1134 static bool rs6000_mode_dependent_address (const_rtx
);
1135 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1136 static bool rs6000_offsettable_memref_p (rtx
, machine_mode
, bool);
1137 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1139 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1142 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1143 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1145 static bool rs6000_debug_secondary_memory_needed (machine_mode
,
1148 static bool rs6000_debug_can_change_mode_class (machine_mode
,
1152 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1153 = rs6000_mode_dependent_address
;
1155 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1157 = rs6000_secondary_reload_class
;
1159 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1160 = rs6000_preferred_reload_class
;
1162 const int INSN_NOT_AVAILABLE
= -1;
1164 static void rs6000_print_isa_options (FILE *, int, const char *,
1166 static void rs6000_print_builtin_options (FILE *, int, const char *,
1168 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1170 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1171 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1172 enum rs6000_reg_type
,
1174 secondary_reload_info
*,
1176 static enum non_prefixed_form
reg_to_non_prefixed (rtx reg
, machine_mode mode
);
1177 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1179 /* Hash table stuff for keeping track of TOC entries. */
1181 struct GTY((for_user
)) toc_hash_struct
1183 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1184 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1186 machine_mode key_mode
;
1190 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1192 static hashval_t
hash (toc_hash_struct
*);
1193 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1196 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1200 /* Default register names. */
1201 char rs6000_reg_names
[][8] =
1204 "0", "1", "2", "3", "4", "5", "6", "7",
1205 "8", "9", "10", "11", "12", "13", "14", "15",
1206 "16", "17", "18", "19", "20", "21", "22", "23",
1207 "24", "25", "26", "27", "28", "29", "30", "31",
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1219 "lr", "ctr", "ca", "ap",
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 /* vrsave vscr sfp */
1223 "vrsave", "vscr", "sfp",
1226 #ifdef TARGET_REGNAMES
1227 static const char alt_reg_names
[][8] =
1230 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1231 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1232 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1233 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1235 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1236 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1237 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1238 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1240 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1241 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1242 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1243 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1245 "lr", "ctr", "ca", "ap",
1247 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1248 /* vrsave vscr sfp */
1249 "vrsave", "vscr", "sfp",
1253 /* Table of valid machine attributes. */
1255 static const struct attribute_spec rs6000_attribute_table
[] =
1257 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1258 affects_type_identity, handler, exclude } */
1259 { "altivec", 1, 1, false, true, false, false,
1260 rs6000_handle_altivec_attribute
, NULL
},
1261 { "longcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute
, NULL
},
1263 { "shortcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute
, NULL
},
1265 { "ms_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute
, NULL
},
1267 { "gcc_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute
, NULL
},
1269 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1270 SUBTARGET_ATTRIBUTE_TABLE
,
1272 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
1275 #ifndef TARGET_PROFILE_KERNEL
1276 #define TARGET_PROFILE_KERNEL 0
1279 /* Initialize the GCC target structure. */
1280 #undef TARGET_ATTRIBUTE_TABLE
1281 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1282 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1283 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1284 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1285 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1287 #undef TARGET_ASM_ALIGNED_DI_OP
1288 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1290 /* Default unaligned ops are only provided for ELF. Find the ops needed
1291 for non-ELF systems. */
1292 #ifndef OBJECT_FORMAT_ELF
1294 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1296 #undef TARGET_ASM_UNALIGNED_HI_OP
1297 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1298 #undef TARGET_ASM_UNALIGNED_SI_OP
1299 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1300 #undef TARGET_ASM_UNALIGNED_DI_OP
1301 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1304 #undef TARGET_ASM_UNALIGNED_HI_OP
1305 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1306 #undef TARGET_ASM_UNALIGNED_SI_OP
1307 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1308 #undef TARGET_ASM_UNALIGNED_DI_OP
1309 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1315 /* This hook deals with fixups for relocatable code and DI-mode objects
1317 #undef TARGET_ASM_INTEGER
1318 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1320 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1321 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1322 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1325 #undef TARGET_SET_UP_BY_PROLOGUE
1326 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1328 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1329 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1330 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1331 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1332 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1336 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1338 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1341 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1342 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1344 #undef TARGET_INTERNAL_ARG_POINTER
1345 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1347 #undef TARGET_HAVE_TLS
1348 #define TARGET_HAVE_TLS HAVE_AS_TLS
1350 #undef TARGET_CANNOT_FORCE_CONST_MEM
1351 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1353 #undef TARGET_DELEGITIMIZE_ADDRESS
1354 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1356 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1357 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1359 #undef TARGET_LEGITIMATE_COMBINED_INSN
1360 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1362 #undef TARGET_ASM_FUNCTION_PROLOGUE
1363 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1364 #undef TARGET_ASM_FUNCTION_EPILOGUE
1365 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1367 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1368 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1370 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1371 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1373 #undef TARGET_LEGITIMIZE_ADDRESS
1374 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1376 #undef TARGET_SCHED_VARIABLE_ISSUE
1377 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1379 #undef TARGET_SCHED_ISSUE_RATE
1380 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1381 #undef TARGET_SCHED_ADJUST_COST
1382 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1383 #undef TARGET_SCHED_ADJUST_PRIORITY
1384 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1385 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1386 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1387 #undef TARGET_SCHED_INIT
1388 #define TARGET_SCHED_INIT rs6000_sched_init
1389 #undef TARGET_SCHED_FINISH
1390 #define TARGET_SCHED_FINISH rs6000_sched_finish
1391 #undef TARGET_SCHED_REORDER
1392 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1393 #undef TARGET_SCHED_REORDER2
1394 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1396 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1397 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1399 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1400 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1402 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1403 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1404 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1405 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1406 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1407 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1408 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1409 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1411 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1412 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1414 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1415 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1416 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1417 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1418 rs6000_builtin_support_vector_misalignment
1419 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1420 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1421 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1422 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1423 rs6000_builtin_vectorization_cost
1424 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1425 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1426 rs6000_preferred_simd_mode
1427 #undef TARGET_VECTORIZE_INIT_COST
1428 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1429 #undef TARGET_VECTORIZE_ADD_STMT_COST
1430 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1431 #undef TARGET_VECTORIZE_FINISH_COST
1432 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1433 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1434 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1436 #undef TARGET_LOOP_UNROLL_ADJUST
1437 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1439 #undef TARGET_INIT_BUILTINS
1440 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1441 #undef TARGET_BUILTIN_DECL
1442 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1444 #undef TARGET_FOLD_BUILTIN
1445 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1446 #undef TARGET_GIMPLE_FOLD_BUILTIN
1447 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1449 #undef TARGET_EXPAND_BUILTIN
1450 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1452 #undef TARGET_MANGLE_TYPE
1453 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1455 #undef TARGET_INIT_LIBFUNCS
1456 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1459 #undef TARGET_BINDS_LOCAL_P
1460 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1463 #undef TARGET_MS_BITFIELD_LAYOUT_P
1464 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1466 #undef TARGET_ASM_OUTPUT_MI_THUNK
1467 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1472 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1473 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1475 #undef TARGET_REGISTER_MOVE_COST
1476 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1477 #undef TARGET_MEMORY_MOVE_COST
1478 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1479 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1480 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1481 rs6000_ira_change_pseudo_allocno_class
1482 #undef TARGET_CANNOT_COPY_INSN_P
1483 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1484 #undef TARGET_RTX_COSTS
1485 #define TARGET_RTX_COSTS rs6000_rtx_costs
1486 #undef TARGET_ADDRESS_COST
1487 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1488 #undef TARGET_INSN_COST
1489 #define TARGET_INSN_COST rs6000_insn_cost
1491 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1492 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1494 #undef TARGET_PROMOTE_FUNCTION_MODE
1495 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1497 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1498 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1500 #undef TARGET_RETURN_IN_MEMORY
1501 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1503 #undef TARGET_RETURN_IN_MSB
1504 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1506 #undef TARGET_SETUP_INCOMING_VARARGS
1507 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1509 /* Always strict argument naming on rs6000. */
1510 #undef TARGET_STRICT_ARGUMENT_NAMING
1511 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1512 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1513 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1514 #undef TARGET_SPLIT_COMPLEX_ARG
1515 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1516 #undef TARGET_MUST_PASS_IN_STACK
1517 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1518 #undef TARGET_PASS_BY_REFERENCE
1519 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1520 #undef TARGET_ARG_PARTIAL_BYTES
1521 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1522 #undef TARGET_FUNCTION_ARG_ADVANCE
1523 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1524 #undef TARGET_FUNCTION_ARG
1525 #define TARGET_FUNCTION_ARG rs6000_function_arg
1526 #undef TARGET_FUNCTION_ARG_PADDING
1527 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1528 #undef TARGET_FUNCTION_ARG_BOUNDARY
1529 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1531 #undef TARGET_BUILD_BUILTIN_VA_LIST
1532 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1534 #undef TARGET_EXPAND_BUILTIN_VA_START
1535 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1537 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1538 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1540 #undef TARGET_EH_RETURN_FILTER_MODE
1541 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1543 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1544 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1546 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1547 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1549 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1550 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1552 #undef TARGET_FLOATN_MODE
1553 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1555 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1556 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1558 #undef TARGET_MD_ASM_ADJUST
1559 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1561 #undef TARGET_OPTION_OVERRIDE
1562 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1564 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1565 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1566 rs6000_builtin_vectorized_function
1568 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1569 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1570 rs6000_builtin_md_vectorized_function
1572 #undef TARGET_STACK_PROTECT_GUARD
1573 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1576 #undef TARGET_STACK_PROTECT_FAIL
1577 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1581 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1582 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1585 /* Use a 32-bit anchor range. This leads to sequences like:
1587 addis tmp,anchor,high
1590 where tmp itself acts as an anchor, and can be shared between
1591 accesses to the same 64k page. */
1592 #undef TARGET_MIN_ANCHOR_OFFSET
1593 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1594 #undef TARGET_MAX_ANCHOR_OFFSET
1595 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1596 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1597 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1598 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1599 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1601 #undef TARGET_BUILTIN_RECIPROCAL
1602 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1604 #undef TARGET_SECONDARY_RELOAD
1605 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1606 #undef TARGET_SECONDARY_MEMORY_NEEDED
1607 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1608 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1609 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1611 #undef TARGET_LEGITIMATE_ADDRESS_P
1612 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1614 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1615 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1617 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1618 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1620 #undef TARGET_CAN_ELIMINATE
1621 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1623 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1624 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1626 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1627 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1629 #undef TARGET_TRAMPOLINE_INIT
1630 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1632 #undef TARGET_FUNCTION_VALUE
1633 #define TARGET_FUNCTION_VALUE rs6000_function_value
1635 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1636 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1638 #undef TARGET_OPTION_SAVE
1639 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1641 #undef TARGET_OPTION_RESTORE
1642 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1644 #undef TARGET_OPTION_PRINT
1645 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1647 #undef TARGET_CAN_INLINE_P
1648 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1650 #undef TARGET_SET_CURRENT_FUNCTION
1651 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1653 #undef TARGET_LEGITIMATE_CONSTANT_P
1654 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1656 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1657 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1659 #undef TARGET_CAN_USE_DOLOOP_P
1660 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1662 #undef TARGET_PREDICT_DOLOOP_P
1663 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1665 #undef TARGET_HAVE_COUNT_REG_DECR_P
1666 #define TARGET_HAVE_COUNT_REG_DECR_P true
1668 /* 1000000000 is infinite cost in IVOPTs. */
1669 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1670 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1672 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1673 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1675 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1676 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1678 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1679 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1680 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1681 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1682 #undef TARGET_UNWIND_WORD_MODE
1683 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1685 #undef TARGET_OFFLOAD_OPTIONS
1686 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1688 #undef TARGET_C_MODE_FOR_SUFFIX
1689 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1691 #undef TARGET_INVALID_BINARY_OP
1692 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1694 #undef TARGET_OPTAB_SUPPORTED_P
1695 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1697 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1698 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1700 #undef TARGET_COMPARE_VERSION_PRIORITY
1701 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1703 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1704 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1705 rs6000_generate_version_dispatcher_body
1707 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1708 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1709 rs6000_get_function_versions_dispatcher
1711 #undef TARGET_OPTION_FUNCTION_VERSIONS
1712 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1714 #undef TARGET_HARD_REGNO_NREGS
1715 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1716 #undef TARGET_HARD_REGNO_MODE_OK
1717 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1719 #undef TARGET_MODES_TIEABLE_P
1720 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1722 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1723 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1724 rs6000_hard_regno_call_part_clobbered
1726 #undef TARGET_SLOW_UNALIGNED_ACCESS
1727 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1729 #undef TARGET_CAN_CHANGE_MODE_CLASS
1730 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1732 #undef TARGET_CONSTANT_ALIGNMENT
1733 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1735 #undef TARGET_STARTING_FRAME_OFFSET
1736 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1738 #if TARGET_ELF && RS6000_WEAK
1739 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1740 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1743 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1744 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1746 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1747 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1749 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1750 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1751 rs6000_cannot_substitute_mem_equiv_p
1753 #undef TARGET_INVALID_CONVERSION
1754 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1757 /* Processor table. */
1760 const char *const name
; /* Canonical processor name. */
1761 const enum processor_type processor
; /* Processor type enum value. */
1762 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1765 static struct rs6000_ptt
const processor_target_table
[] =
1767 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1768 #include "rs6000-cpus.def"
1772 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1776 rs6000_cpu_name_lookup (const char *name
)
1782 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
1783 if (! strcmp (name
, processor_target_table
[i
].name
))
1791 /* Return number of consecutive hard regs needed starting at reg REGNO
1792 to hold something of mode MODE.
1793 This is ordinarily the length in words of a value of mode MODE
1794 but can be less for certain modes in special long registers.
1796 POWER and PowerPC GPRs hold 32 bits worth;
1797 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1800 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
1802 unsigned HOST_WIDE_INT reg_size
;
1804 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1805 128-bit floating point that can go in vector registers, which has VSX
1806 memory addressing. */
1807 if (FP_REGNO_P (regno
))
1808 reg_size
= (VECTOR_MEM_VSX_P (mode
) || VECTOR_ALIGNMENT_P (mode
)
1809 ? UNITS_PER_VSX_WORD
1810 : UNITS_PER_FP_WORD
);
1812 else if (ALTIVEC_REGNO_P (regno
))
1813 reg_size
= UNITS_PER_ALTIVEC_WORD
;
1816 reg_size
= UNITS_PER_WORD
;
1818 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
1821 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1824 rs6000_hard_regno_mode_ok_uncached (int regno
, machine_mode mode
)
1826 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
1828 if (COMPLEX_MODE_P (mode
))
1829 mode
= GET_MODE_INNER (mode
);
1831 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1834 return (TARGET_MMA
&& VSX_REGNO_P (regno
) && (regno
& 1) == 0);
1836 /* MMA accumulator modes need FPR registers divisible by 4. */
1838 return (TARGET_MMA
&& FP_REGNO_P (regno
) && (regno
& 3) == 0);
1840 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1841 register combinations, and use PTImode where we need to deal with quad
1842 word memory operations. Don't allow quad words in the argument or frame
1843 pointer registers, just registers 0..31. */
1844 if (mode
== PTImode
)
1845 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1846 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1847 && ((regno
& 1) == 0));
1849 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1850 implementations. Don't allow an item to be split between a FP register
1851 and an Altivec register. Allow TImode in all VSX registers if the user
1853 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
1854 && (VECTOR_MEM_VSX_P (mode
)
1855 || VECTOR_ALIGNMENT_P (mode
)
1856 || reg_addr
[mode
].scalar_in_vmx_p
1858 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
1860 if (FP_REGNO_P (regno
))
1861 return FP_REGNO_P (last_regno
);
1863 if (ALTIVEC_REGNO_P (regno
))
1865 if (GET_MODE_SIZE (mode
) < 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
1868 return ALTIVEC_REGNO_P (last_regno
);
1872 /* The GPRs can hold any mode, but values bigger than one register
1873 cannot go past R31. */
1874 if (INT_REGNO_P (regno
))
1875 return INT_REGNO_P (last_regno
);
1877 /* The float registers (except for VSX vector modes) can only hold floating
1878 modes and DImode. */
1879 if (FP_REGNO_P (regno
))
1881 if (VECTOR_ALIGNMENT_P (mode
))
1884 if (SCALAR_FLOAT_MODE_P (mode
)
1885 && (mode
!= TDmode
|| (regno
% 2) == 0)
1886 && FP_REGNO_P (last_regno
))
1889 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1891 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
1894 if (TARGET_P8_VECTOR
&& (mode
== SImode
))
1897 if (TARGET_P9_VECTOR
&& (mode
== QImode
|| mode
== HImode
))
1904 /* The CR register can only hold CC modes. */
1905 if (CR_REGNO_P (regno
))
1906 return GET_MODE_CLASS (mode
) == MODE_CC
;
1908 if (CA_REGNO_P (regno
))
1909 return mode
== Pmode
|| mode
== SImode
;
1911 /* AltiVec only in AldyVec registers. */
1912 if (ALTIVEC_REGNO_P (regno
))
1913 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
1914 || mode
== V1TImode
);
1916 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1917 and it must be able to fit within the register set. */
1919 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
1922 /* Implement TARGET_HARD_REGNO_NREGS. */
1925 rs6000_hard_regno_nregs_hook (unsigned int regno
, machine_mode mode
)
1927 return rs6000_hard_regno_nregs
[mode
][regno
];
1930 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1933 rs6000_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
1935 return rs6000_hard_regno_mode_ok_p
[mode
][regno
];
1938 /* Implement TARGET_MODES_TIEABLE_P.
1940 PTImode cannot tie with other modes because PTImode is restricted to even
1941 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1944 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1945 registers) or XOmode (vector quad, restricted to FPR registers divisible
1946 by 4) to tie with other modes.
1948 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1949 128-bit floating point on VSX systems ties with other vectors. */
1952 rs6000_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
1954 if (mode1
== PTImode
|| mode1
== OOmode
|| mode1
== XOmode
1955 || mode2
== PTImode
|| mode2
== OOmode
|| mode2
== XOmode
)
1956 return mode1
== mode2
;
1958 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1
))
1959 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2
);
1960 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2
))
1963 if (SCALAR_FLOAT_MODE_P (mode1
))
1964 return SCALAR_FLOAT_MODE_P (mode2
);
1965 if (SCALAR_FLOAT_MODE_P (mode2
))
1968 if (GET_MODE_CLASS (mode1
) == MODE_CC
)
1969 return GET_MODE_CLASS (mode2
) == MODE_CC
;
1970 if (GET_MODE_CLASS (mode2
) == MODE_CC
)
1976 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1979 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
1984 && GET_MODE_SIZE (mode
) > 4
1985 && INT_REGNO_P (regno
))
1989 && FP_REGNO_P (regno
)
1990 && GET_MODE_SIZE (mode
) > 8
1991 && !FLOAT128_2REG_P (mode
))
1997 /* Print interesting facts about registers. */
1999 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2003 for (r
= first_regno
; r
<= last_regno
; ++r
)
2005 const char *comma
= "";
2008 if (first_regno
== last_regno
)
2009 fprintf (stderr
, "%s:\t", reg_name
);
2011 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2014 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2015 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2019 fprintf (stderr
, ",\n\t");
2024 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2025 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2026 rs6000_hard_regno_nregs
[m
][r
]);
2028 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2033 if (call_used_or_fixed_reg_p (r
))
2037 fprintf (stderr
, ",\n\t");
2042 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2050 fprintf (stderr
, ",\n\t");
2055 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2061 fprintf (stderr
, ",\n\t");
2065 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2066 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2071 fprintf (stderr
, ",\n\t");
2075 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2080 rs6000_debug_vector_unit (enum rs6000_vector v
)
2086 case VECTOR_NONE
: ret
= "none"; break;
2087 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2088 case VECTOR_VSX
: ret
= "vsx"; break;
2089 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2090 default: ret
= "unknown"; break;
2096 /* Inner function printing just the address mask for a particular reload
2098 DEBUG_FUNCTION
char *
2099 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2104 if ((mask
& RELOAD_REG_VALID
) != 0)
2106 else if (keep_spaces
)
2109 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2111 else if (keep_spaces
)
2114 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2116 else if (keep_spaces
)
2119 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2121 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2123 else if (keep_spaces
)
2126 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2128 else if (keep_spaces
)
2131 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2133 else if (keep_spaces
)
2136 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2138 else if (keep_spaces
)
2146 /* Print the address masks in a human readble fashion. */
2148 rs6000_debug_print_mode (ssize_t m
)
2153 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2154 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2155 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2156 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2158 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2159 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2161 fprintf (stderr
, "%*s Reload=%c%c", spaces
, "",
2162 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2163 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2167 spaces
+= strlen (" Reload=sl");
2169 if (reg_addr
[m
].scalar_in_vmx_p
)
2171 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2175 spaces
+= strlen (" Upper=y");
2177 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2178 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2180 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2182 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2183 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2186 fputs ("\n", stderr
);
2189 #define DEBUG_FMT_ID "%-32s= "
2190 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2191 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2192 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2194 /* Print various interesting information with -mdebug=reg. */
2196 rs6000_debug_reg_global (void)
2198 static const char *const tf
[2] = { "false", "true" };
2199 const char *nl
= (const char *)0;
2202 char costly_num
[20];
2204 char flags_buffer
[40];
2205 const char *costly_str
;
2206 const char *nop_str
;
2207 const char *trace_str
;
2208 const char *abi_str
;
2209 const char *cmodel_str
;
2210 struct cl_target_option cl_opts
;
2212 /* Modes we want tieable information on. */
2213 static const machine_mode print_tieable_modes
[] = {
2252 /* Virtual regs we are interested in. */
2253 const static struct {
2254 int regno
; /* register number. */
2255 const char *name
; /* register name. */
2256 } virtual_regs
[] = {
2257 { STACK_POINTER_REGNUM
, "stack pointer:" },
2258 { TOC_REGNUM
, "toc: " },
2259 { STATIC_CHAIN_REGNUM
, "static chain: " },
2260 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2261 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2262 { ARG_POINTER_REGNUM
, "arg pointer: " },
2263 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2264 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2265 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2266 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2267 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2268 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2269 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2270 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2271 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2272 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2275 fputs ("\nHard register information:\n", stderr
);
2276 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2277 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2278 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2281 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2282 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2283 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2284 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2285 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2286 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2288 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2289 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2290 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2294 "d reg_class = %s\n"
2295 "f reg_class = %s\n"
2296 "v reg_class = %s\n"
2297 "wa reg_class = %s\n"
2298 "we reg_class = %s\n"
2299 "wr reg_class = %s\n"
2300 "wx reg_class = %s\n"
2301 "wA reg_class = %s\n"
2303 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2304 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_f
]],
2305 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2306 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2307 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2308 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2309 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2310 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]]);
2313 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2314 rs6000_debug_print_mode (m
);
2316 fputs ("\n", stderr
);
2318 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2320 machine_mode mode1
= print_tieable_modes
[m1
];
2321 bool first_time
= true;
2323 nl
= (const char *)0;
2324 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2326 machine_mode mode2
= print_tieable_modes
[m2
];
2327 if (mode1
!= mode2
&& rs6000_modes_tieable_p (mode1
, mode2
))
2331 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2336 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2341 fputs ("\n", stderr
);
2347 if (rs6000_recip_control
)
2349 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2351 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2352 if (rs6000_recip_bits
[m
])
2355 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2357 (RS6000_RECIP_AUTO_RE_P (m
)
2359 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2360 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2362 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2365 fputs ("\n", stderr
);
2368 if (rs6000_cpu_index
>= 0)
2370 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2372 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2374 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2375 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2378 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2380 if (rs6000_tune_index
>= 0)
2382 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2384 = processor_target_table
[rs6000_tune_index
].target_enable
;
2386 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2387 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2390 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2392 cl_target_option_save (&cl_opts
, &global_options
, &global_options_set
);
2393 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2396 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2397 rs6000_isa_flags_explicit
);
2399 rs6000_print_builtin_options (stderr
, 0, "rs6000_builtin_mask",
2400 rs6000_builtin_mask
);
2402 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2404 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2405 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2407 switch (rs6000_sched_costly_dep
)
2409 case max_dep_latency
:
2410 costly_str
= "max_dep_latency";
2414 costly_str
= "no_dep_costly";
2417 case all_deps_costly
:
2418 costly_str
= "all_deps_costly";
2421 case true_store_to_load_dep_costly
:
2422 costly_str
= "true_store_to_load_dep_costly";
2425 case store_to_load_dep_costly
:
2426 costly_str
= "store_to_load_dep_costly";
2430 costly_str
= costly_num
;
2431 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2435 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2437 switch (rs6000_sched_insert_nops
)
2439 case sched_finish_regroup_exact
:
2440 nop_str
= "sched_finish_regroup_exact";
2443 case sched_finish_pad_groups
:
2444 nop_str
= "sched_finish_pad_groups";
2447 case sched_finish_none
:
2448 nop_str
= "sched_finish_none";
2453 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2457 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2459 switch (rs6000_sdata
)
2466 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2470 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2474 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2479 switch (rs6000_traceback
)
2481 case traceback_default
: trace_str
= "default"; break;
2482 case traceback_none
: trace_str
= "none"; break;
2483 case traceback_part
: trace_str
= "part"; break;
2484 case traceback_full
: trace_str
= "full"; break;
2485 default: trace_str
= "unknown"; break;
2488 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2490 switch (rs6000_current_cmodel
)
2492 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2493 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2494 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2495 default: cmodel_str
= "unknown"; break;
2498 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2500 switch (rs6000_current_abi
)
2502 case ABI_NONE
: abi_str
= "none"; break;
2503 case ABI_AIX
: abi_str
= "aix"; break;
2504 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2505 case ABI_V4
: abi_str
= "V4"; break;
2506 case ABI_DARWIN
: abi_str
= "darwin"; break;
2507 default: abi_str
= "unknown"; break;
2510 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2512 if (rs6000_altivec_abi
)
2513 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2515 if (rs6000_darwin64_abi
)
2516 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2518 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2519 (TARGET_SOFT_FLOAT
? "true" : "false"));
2521 if (TARGET_LINK_STACK
)
2522 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2524 if (TARGET_P8_FUSION
)
2528 strcpy (options
, "power8");
2529 if (TARGET_P8_FUSION_SIGN
)
2530 strcat (options
, ", sign");
2532 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2535 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2536 TARGET_SECURE_PLT
? "secure" : "bss");
2537 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2538 aix_struct_return
? "aix" : "sysv");
2539 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2540 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2541 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2542 tf
[!!rs6000_align_branch_targets
]);
2543 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2544 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2545 rs6000_long_double_type_size
);
2546 if (rs6000_long_double_type_size
> 64)
2548 fprintf (stderr
, DEBUG_FMT_S
, "long double type",
2549 TARGET_IEEEQUAD
? "IEEE" : "IBM");
2550 fprintf (stderr
, DEBUG_FMT_S
, "default long double type",
2551 TARGET_IEEEQUAD_DEFAULT
? "IEEE" : "IBM");
2553 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2554 (int)rs6000_sched_restricted_insns_priority
);
2555 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2557 fprintf (stderr
, DEBUG_FMT_D
, "Number of rs6000 builtins",
2558 (int)RS6000_BUILTIN_COUNT
);
2560 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2561 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2564 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2565 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2567 if (TARGET_DIRECT_MOVE_128
)
2568 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2569 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2573 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2574 legitimate address support to figure out the appropriate addressing to
2578 rs6000_setup_reg_addr_masks (void)
2580 ssize_t rc
, reg
, m
, nregs
;
2581 addr_mask_type any_addr_mask
, addr_mask
;
2583 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2585 machine_mode m2
= (machine_mode
) m
;
2586 bool complex_p
= false;
2587 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2590 if (COMPLEX_MODE_P (m2
))
2593 m2
= GET_MODE_INNER (m2
);
2596 msize
= GET_MODE_SIZE (m2
);
2598 /* SDmode is special in that we want to access it only via REG+REG
2599 addressing on power7 and above, since we want to use the LFIWZX and
2600 STFIWZX instructions to load it. */
2601 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2604 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2607 reg
= reload_reg_map
[rc
].reg
;
2609 /* Can mode values go in the GPR/FPR/Altivec registers? */
2610 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2612 bool small_int_vsx_p
= (small_int_p
2613 && (rc
== RELOAD_REG_FPR
2614 || rc
== RELOAD_REG_VMX
));
2616 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2617 addr_mask
|= RELOAD_REG_VALID
;
2619 /* Indicate if the mode takes more than 1 physical register. If
2620 it takes a single register, indicate it can do REG+REG
2621 addressing. Small integers in VSX registers can only do
2622 REG+REG addressing. */
2623 if (small_int_vsx_p
)
2624 addr_mask
|= RELOAD_REG_INDEXED
;
2625 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2626 addr_mask
|= RELOAD_REG_MULTIPLE
;
2628 addr_mask
|= RELOAD_REG_INDEXED
;
2630 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2631 addressing. If we allow scalars into Altivec registers,
2632 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2634 For VSX systems, we don't allow update addressing for
2635 DFmode/SFmode if those registers can go in both the
2636 traditional floating point registers and Altivec registers.
2637 The load/store instructions for the Altivec registers do not
2638 have update forms. If we allowed update addressing, it seems
2639 to break IV-OPT code using floating point if the index type is
2640 int instead of long (PR target/81550 and target/84042). */
2643 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2645 && !VECTOR_MODE_P (m2
)
2646 && !VECTOR_ALIGNMENT_P (m2
)
2648 && (m
!= E_DFmode
|| !TARGET_VSX
)
2649 && (m
!= E_SFmode
|| !TARGET_P8_VECTOR
)
2650 && !small_int_vsx_p
)
2652 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2654 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2655 we don't allow PRE_MODIFY for some multi-register
2660 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2664 if (TARGET_POWERPC64
)
2665 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2670 if (TARGET_HARD_FLOAT
)
2671 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2677 /* GPR and FPR registers can do REG+OFFSET addressing, except
2678 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2679 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2680 if ((addr_mask
!= 0) && !indexed_only_p
2682 && (rc
== RELOAD_REG_GPR
2683 || ((msize
== 8 || m2
== SFmode
)
2684 && (rc
== RELOAD_REG_FPR
2685 || (rc
== RELOAD_REG_VMX
&& TARGET_P9_VECTOR
)))))
2686 addr_mask
|= RELOAD_REG_OFFSET
;
2688 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2689 instructions are enabled. The offset for 128-bit VSX registers is
2690 only 12-bits. While GPRs can handle the full offset range, VSX
2691 registers can only handle the restricted range. */
2692 else if ((addr_mask
!= 0) && !indexed_only_p
2693 && msize
== 16 && TARGET_P9_VECTOR
2694 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2695 || (m2
== TImode
&& TARGET_VSX
)))
2697 addr_mask
|= RELOAD_REG_OFFSET
;
2698 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2699 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2702 /* Vector pairs can do both indexed and offset loads if the
2703 instructions are enabled, otherwise they can only do offset loads
2704 since it will be broken into two vector moves. Vector quads can
2705 only do offset loads. */
2706 else if ((addr_mask
!= 0) && TARGET_MMA
2707 && (m2
== OOmode
|| m2
== XOmode
))
2709 addr_mask
|= RELOAD_REG_OFFSET
;
2710 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2712 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2714 addr_mask
|= RELOAD_REG_INDEXED
;
2718 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2719 addressing on 128-bit types. */
2720 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2721 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2722 addr_mask
|= RELOAD_REG_AND_M16
;
2724 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2725 any_addr_mask
|= addr_mask
;
2728 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2733 /* Initialize the various global tables that are based on register size. */
2735 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2741 /* Precalculate REGNO_REG_CLASS. */
2742 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2743 for (r
= 1; r
< 32; ++r
)
2744 rs6000_regno_regclass
[r
] = BASE_REGS
;
2746 for (r
= 32; r
< 64; ++r
)
2747 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2749 for (r
= 64; HARD_REGISTER_NUM_P (r
); ++r
)
2750 rs6000_regno_regclass
[r
] = NO_REGS
;
2752 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
2753 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
2755 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
2756 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
2757 rs6000_regno_regclass
[r
] = CR_REGS
;
2759 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
2760 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
2761 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
2762 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
2763 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
2764 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
2765 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
2767 /* Precalculate register class to simpler reload register class. We don't
2768 need all of the register classes that are combinations of different
2769 classes, just the simple ones that have constraint letters. */
2770 for (c
= 0; c
< N_REG_CLASSES
; c
++)
2771 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
2773 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
2774 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
2775 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
2776 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
2777 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
2778 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
2779 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
2780 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
2781 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
2782 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
2786 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
2787 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
2791 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
2792 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
2795 /* Precalculate the valid memory formats as well as the vector information,
2796 this must be set up before the rs6000_hard_regno_nregs_internal calls
2798 gcc_assert ((int)VECTOR_NONE
== 0);
2799 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
2800 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_mem
));
2802 gcc_assert ((int)CODE_FOR_nothing
== 0);
2803 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
2805 gcc_assert ((int)NO_REGS
== 0);
2806 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
2808 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2809 believes it can use native alignment or still uses 128-bit alignment. */
2810 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
2821 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2822 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2823 if (TARGET_FLOAT128_TYPE
)
2825 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
2826 rs6000_vector_align
[KFmode
] = 128;
2828 if (FLOAT128_IEEE_P (TFmode
))
2830 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
2831 rs6000_vector_align
[TFmode
] = 128;
2835 /* V2DF mode, VSX only. */
2838 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
2839 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
2840 rs6000_vector_align
[V2DFmode
] = align64
;
2843 /* V4SF mode, either VSX or Altivec. */
2846 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
2847 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
2848 rs6000_vector_align
[V4SFmode
] = align32
;
2850 else if (TARGET_ALTIVEC
)
2852 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
2853 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
2854 rs6000_vector_align
[V4SFmode
] = align32
;
2857 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2861 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
2862 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
2863 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
2864 rs6000_vector_align
[V4SImode
] = align32
;
2865 rs6000_vector_align
[V8HImode
] = align32
;
2866 rs6000_vector_align
[V16QImode
] = align32
;
2870 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
2871 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
2872 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
2876 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
2877 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
2878 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
2882 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2883 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2886 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
2887 rs6000_vector_unit
[V2DImode
]
2888 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2889 rs6000_vector_align
[V2DImode
] = align64
;
2891 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
2892 rs6000_vector_unit
[V1TImode
]
2893 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2894 rs6000_vector_align
[V1TImode
] = 128;
2897 /* DFmode, see if we want to use the VSX unit. Memory is handled
2898 differently, so don't set rs6000_vector_mem. */
2901 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
2902 rs6000_vector_align
[DFmode
] = 64;
2905 /* SFmode, see if we want to use the VSX unit. */
2906 if (TARGET_P8_VECTOR
)
2908 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
2909 rs6000_vector_align
[SFmode
] = 32;
2912 /* Allow TImode in VSX register and set the VSX memory macros. */
2915 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
2916 rs6000_vector_align
[TImode
] = align64
;
2919 /* Add support for vector pairs and vector quad registers. */
2922 rs6000_vector_unit
[OOmode
] = VECTOR_NONE
;
2923 rs6000_vector_mem
[OOmode
] = VECTOR_VSX
;
2924 rs6000_vector_align
[OOmode
] = 256;
2926 rs6000_vector_unit
[XOmode
] = VECTOR_NONE
;
2927 rs6000_vector_mem
[XOmode
] = VECTOR_VSX
;
2928 rs6000_vector_align
[XOmode
] = 512;
2931 /* Register class constraints for the constraints that depend on compile
2932 switches. When the VSX code was added, different constraints were added
2933 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2934 of the VSX registers are used. The register classes for scalar floating
2935 point types is set, based on whether we allow that type into the upper
2936 (Altivec) registers. GCC has register classes to target the Altivec
2937 registers for load/store operations, to select using a VSX memory
2938 operation instead of the traditional floating point operation. The
2941 d - Register class to use with traditional DFmode instructions.
2942 f - Register class to use with traditional SFmode instructions.
2943 v - Altivec register.
2944 wa - Any VSX register.
2945 wc - Reserved to represent individual CR bits (used in LLVM).
2946 wn - always NO_REGS.
2947 wr - GPR if 64-bit mode is permitted.
2948 wx - Float register if we can do 32-bit int stores. */
2950 if (TARGET_HARD_FLOAT
)
2952 rs6000_constraints
[RS6000_CONSTRAINT_f
] = FLOAT_REGS
; /* SFmode */
2953 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
; /* DFmode */
2957 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
2959 /* Add conditional constraints based on various options, to allow us to
2960 collapse multiple insn patterns. */
2962 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
2964 if (TARGET_POWERPC64
)
2966 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
2967 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
2971 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
2973 /* Support for new direct moves (ISA 3.0 + 64bit). */
2974 if (TARGET_DIRECT_MOVE_128
)
2975 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
2977 /* Set up the reload helper and direct move functions. */
2978 if (TARGET_VSX
|| TARGET_ALTIVEC
)
2982 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
2983 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
2984 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
2985 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
2986 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
2987 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
2988 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
2989 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
2990 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
2991 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
2992 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
2993 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
2994 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
2995 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
2996 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
2997 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
2998 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
2999 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3000 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3001 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3003 if (FLOAT128_VECTOR_P (KFmode
))
3005 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3006 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3009 if (FLOAT128_VECTOR_P (TFmode
))
3011 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3012 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3015 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3017 if (TARGET_NO_SDMODE_STACK
)
3019 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3020 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3025 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3026 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3029 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3031 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3032 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3033 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3034 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3035 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3036 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3037 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3038 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3039 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3041 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3042 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3043 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3044 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3045 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3046 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3047 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3048 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3049 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3051 if (FLOAT128_VECTOR_P (KFmode
))
3053 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3054 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3057 if (FLOAT128_VECTOR_P (TFmode
))
3059 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3060 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3065 reg_addr
[OOmode
].reload_store
= CODE_FOR_reload_oo_di_store
;
3066 reg_addr
[OOmode
].reload_load
= CODE_FOR_reload_oo_di_load
;
3067 reg_addr
[XOmode
].reload_store
= CODE_FOR_reload_xo_di_store
;
3068 reg_addr
[XOmode
].reload_load
= CODE_FOR_reload_xo_di_load
;
3074 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3075 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3076 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3077 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3078 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3079 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3080 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3081 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3082 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3083 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3084 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3085 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3086 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3087 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3088 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3089 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3090 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3091 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3092 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3093 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3095 if (FLOAT128_VECTOR_P (KFmode
))
3097 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3098 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3101 if (FLOAT128_IEEE_P (TFmode
))
3103 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3104 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3107 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3109 if (TARGET_NO_SDMODE_STACK
)
3111 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3112 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3117 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3118 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3121 if (TARGET_DIRECT_MOVE
)
3123 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3124 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3125 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3129 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3130 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3132 if (TARGET_P8_VECTOR
)
3134 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3135 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3137 if (TARGET_P9_VECTOR
)
3139 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3140 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3145 /* Precalculate HARD_REGNO_NREGS. */
3146 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3147 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3148 rs6000_hard_regno_nregs
[m
][r
]
3149 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
) m
);
3151 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3152 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3153 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3154 rs6000_hard_regno_mode_ok_p
[m
][r
]
3155 = rs6000_hard_regno_mode_ok_uncached (r
, (machine_mode
) m
);
3157 /* Precalculate CLASS_MAX_NREGS sizes. */
3158 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3162 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3163 reg_size
= UNITS_PER_VSX_WORD
;
3165 else if (c
== ALTIVEC_REGS
)
3166 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3168 else if (c
== FLOAT_REGS
)
3169 reg_size
= UNITS_PER_FP_WORD
;
3172 reg_size
= UNITS_PER_WORD
;
3174 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3176 machine_mode m2
= (machine_mode
)m
;
3177 int reg_size2
= reg_size
;
3179 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3181 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3182 reg_size2
= UNITS_PER_FP_WORD
;
3184 rs6000_class_max_nregs
[m
][c
]
3185 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3189 /* Calculate which modes to automatically generate code to use a the
3190 reciprocal divide and square root instructions. In the future, possibly
3191 automatically generate the instructions even if the user did not specify
3192 -mrecip. The older machines double precision reciprocal sqrt estimate is
3193 not accurate enough. */
3194 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3196 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3198 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3199 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3200 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3201 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3202 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3204 if (TARGET_FRSQRTES
)
3205 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3207 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3208 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3209 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3210 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3211 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3213 if (rs6000_recip_control
)
3215 if (!flag_finite_math_only
)
3216 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3218 if (flag_trapping_math
)
3219 warning (0, "%qs requires %qs or %qs", "-mrecip",
3220 "-fno-trapping-math", "-ffast-math");
3221 if (!flag_reciprocal_math
)
3222 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3224 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3226 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3227 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3228 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3230 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3231 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3232 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3234 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3235 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3236 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3238 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3239 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3240 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3242 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3243 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3244 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3246 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3247 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3248 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3250 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3251 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3252 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3254 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3255 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3256 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3260 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3261 legitimate address support to figure out the appropriate addressing to
3263 rs6000_setup_reg_addr_masks ();
3265 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3267 if (TARGET_DEBUG_REG
)
3268 rs6000_debug_reg_global ();
3270 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3272 "SImode variable mult cost = %d\n"
3273 "SImode constant mult cost = %d\n"
3274 "SImode short constant mult cost = %d\n"
3275 "DImode multipliciation cost = %d\n"
3276 "SImode division cost = %d\n"
3277 "DImode division cost = %d\n"
3278 "Simple fp operation cost = %d\n"
3279 "DFmode multiplication cost = %d\n"
3280 "SFmode division cost = %d\n"
3281 "DFmode division cost = %d\n"
3282 "cache line size = %d\n"
3283 "l1 cache size = %d\n"
3284 "l2 cache size = %d\n"
3285 "simultaneous prefetches = %d\n"
3288 rs6000_cost
->mulsi_const
,
3289 rs6000_cost
->mulsi_const9
,
3297 rs6000_cost
->cache_line_size
,
3298 rs6000_cost
->l1_cache_size
,
3299 rs6000_cost
->l2_cache_size
,
3300 rs6000_cost
->simultaneous_prefetches
);
3305 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3308 darwin_rs6000_override_options (void)
3310 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3312 rs6000_altivec_abi
= 1;
3313 TARGET_ALTIVEC_VRSAVE
= 1;
3314 rs6000_current_abi
= ABI_DARWIN
;
3316 if (DEFAULT_ABI
== ABI_DARWIN
3318 darwin_one_byte_bool
= 1;
3320 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3322 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3323 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3326 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3327 optimisation, and will not work with the most generic case (where the
3328 symbol is undefined external, but there is no symbl stub). */
3330 rs6000_default_long_calls
= 0;
3332 /* ld_classic is (so far) still used for kernel (static) code, and supports
3333 the JBSR longcall / branch islands. */
3336 rs6000_default_long_calls
= 1;
3338 /* Allow a kext author to do -mkernel -mhard-float. */
3339 if (! (rs6000_isa_flags_explicit
& OPTION_MASK_SOFT_FLOAT
))
3340 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3343 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3345 if (!flag_mkernel
&& !flag_apple_kext
3347 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3348 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3350 /* Unless the user (not the configurer) has explicitly overridden
3351 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3352 G4 unless targeting the kernel. */
3355 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3356 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3357 && ! global_options_set
.x_rs6000_cpu_index
)
3359 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3364 /* If not otherwise specified by a target, make 'long double' equivalent to
3367 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3368 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3371 /* Return the builtin mask of the various options used that could affect which
3372 builtins were used. In the past we used target_flags, but we've run out of
3373 bits, and some options are no longer in target_flags. */
3376 rs6000_builtin_mask_calculate (void)
3378 return (((TARGET_ALTIVEC
) ? RS6000_BTM_ALTIVEC
: 0)
3379 | ((TARGET_CMPB
) ? RS6000_BTM_CMPB
: 0)
3380 | ((TARGET_VSX
) ? RS6000_BTM_VSX
: 0)
3381 | ((TARGET_FRE
) ? RS6000_BTM_FRE
: 0)
3382 | ((TARGET_FRES
) ? RS6000_BTM_FRES
: 0)
3383 | ((TARGET_FRSQRTE
) ? RS6000_BTM_FRSQRTE
: 0)
3384 | ((TARGET_FRSQRTES
) ? RS6000_BTM_FRSQRTES
: 0)
3385 | ((TARGET_POPCNTD
) ? RS6000_BTM_POPCNTD
: 0)
3386 | ((rs6000_cpu
== PROCESSOR_CELL
) ? RS6000_BTM_CELL
: 0)
3387 | ((TARGET_P8_VECTOR
) ? RS6000_BTM_P8_VECTOR
: 0)
3388 | ((TARGET_P9_VECTOR
) ? RS6000_BTM_P9_VECTOR
: 0)
3389 | ((TARGET_P9_MISC
) ? RS6000_BTM_P9_MISC
: 0)
3390 | ((TARGET_MODULO
) ? RS6000_BTM_MODULO
: 0)
3391 | ((TARGET_64BIT
) ? RS6000_BTM_64BIT
: 0)
3392 | ((TARGET_POWERPC64
) ? RS6000_BTM_POWERPC64
: 0)
3393 | ((TARGET_CRYPTO
) ? RS6000_BTM_CRYPTO
: 0)
3394 | ((TARGET_HTM
) ? RS6000_BTM_HTM
: 0)
3395 | ((TARGET_DFP
) ? RS6000_BTM_DFP
: 0)
3396 | ((TARGET_HARD_FLOAT
) ? RS6000_BTM_HARD_FLOAT
: 0)
3397 | ((TARGET_LONG_DOUBLE_128
3398 && TARGET_HARD_FLOAT
3399 && !TARGET_IEEEQUAD
) ? RS6000_BTM_LDBL128
: 0)
3400 | ((TARGET_FLOAT128_TYPE
) ? RS6000_BTM_FLOAT128
: 0)
3401 | ((TARGET_FLOAT128_HW
) ? RS6000_BTM_FLOAT128_HW
: 0)
3402 | ((TARGET_MMA
) ? RS6000_BTM_MMA
: 0)
3403 | ((TARGET_POWER10
) ? RS6000_BTM_P10
: 0));
3406 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3407 to clobber the XER[CA] bit because clobbering that bit without telling
3408 the compiler worked just fine with versions of GCC before GCC 5, and
3409 breaking a lot of older code in ways that are hard to track down is
3410 not such a great idea. */
3413 rs6000_md_asm_adjust (vec
<rtx
> &/*outputs*/, vec
<rtx
> &/*inputs*/,
3414 vec
<const char *> &/*constraints*/,
3415 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
3417 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3418 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3422 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3423 but is called when the optimize level is changed via an attribute or
3424 pragma or when it is reset at the end of the code affected by the
3425 attribute or pragma. It is not called at the beginning of compilation
3426 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3427 actions then, you should have TARGET_OPTION_OVERRIDE call
3428 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3431 rs6000_override_options_after_change (void)
3433 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3434 turns -frename-registers on. */
3435 if ((global_options_set
.x_flag_unroll_loops
&& flag_unroll_loops
)
3436 || (global_options_set
.x_flag_unroll_all_loops
3437 && flag_unroll_all_loops
))
3439 if (!global_options_set
.x_unroll_only_small_loops
)
3440 unroll_only_small_loops
= 0;
3441 if (!global_options_set
.x_flag_rename_registers
)
3442 flag_rename_registers
= 1;
3443 if (!global_options_set
.x_flag_cunroll_grow_size
)
3444 flag_cunroll_grow_size
= 1;
3446 else if (!global_options_set
.x_flag_cunroll_grow_size
)
3447 flag_cunroll_grow_size
= flag_peel_loops
|| optimize
>= 3;
3450 #ifdef TARGET_USES_LINUX64_OPT
3452 rs6000_linux64_override_options ()
3454 if (!global_options_set
.x_rs6000_alignment_flags
)
3455 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
3456 if (rs6000_isa_flags
& OPTION_MASK_64BIT
)
3458 if (DEFAULT_ABI
!= ABI_AIX
)
3460 rs6000_current_abi
= ABI_AIX
;
3461 error (INVALID_64BIT
, "call");
3463 dot_symbols
= !strcmp (rs6000_abi_name
, "aixdesc");
3464 if (ELFv2_ABI_CHECK
)
3466 rs6000_current_abi
= ABI_ELFv2
;
3468 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3470 if (rs6000_isa_flags
& OPTION_MASK_RELOCATABLE
)
3472 rs6000_isa_flags
&= ~OPTION_MASK_RELOCATABLE
;
3473 error (INVALID_64BIT
, "relocatable");
3475 if (rs6000_isa_flags
& OPTION_MASK_EABI
)
3477 rs6000_isa_flags
&= ~OPTION_MASK_EABI
;
3478 error (INVALID_64BIT
, "eabi");
3480 if (TARGET_PROTOTYPE
)
3482 target_prototype
= 0;
3483 error (INVALID_64BIT
, "prototype");
3485 if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) == 0)
3487 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3488 error ("%<-m64%> requires a PowerPC64 cpu");
3490 if (!global_options_set
.x_rs6000_current_cmodel
)
3491 SET_CMODEL (CMODEL_MEDIUM
);
3492 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MINIMAL_TOC
) != 0)
3494 if (global_options_set
.x_rs6000_current_cmodel
3495 && rs6000_current_cmodel
!= CMODEL_SMALL
)
3496 error ("%<-mcmodel incompatible with other toc options%>");
3497 if (TARGET_MINIMAL_TOC
)
3498 SET_CMODEL (CMODEL_SMALL
);
3499 else if (TARGET_PCREL
3500 || (PCREL_SUPPORTED_BY_OS
3501 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0))
3502 /* Ignore -mno-minimal-toc. */
3505 SET_CMODEL (CMODEL_SMALL
);
3507 if (rs6000_current_cmodel
!= CMODEL_SMALL
)
3509 if (!global_options_set
.x_TARGET_NO_FP_IN_TOC
)
3510 TARGET_NO_FP_IN_TOC
= rs6000_current_cmodel
== CMODEL_MEDIUM
;
3511 if (!global_options_set
.x_TARGET_NO_SUM_IN_TOC
)
3512 TARGET_NO_SUM_IN_TOC
= 0;
3514 if (TARGET_PLTSEQ
&& DEFAULT_ABI
!= ABI_ELFv2
)
3516 if (global_options_set
.x_rs6000_pltseq
)
3517 warning (0, "%qs unsupported for this ABI",
3519 rs6000_pltseq
= false;
3522 else if (TARGET_64BIT
)
3523 error (INVALID_32BIT
, "32");
3526 if (TARGET_PROFILE_KERNEL
)
3529 error (INVALID_32BIT
, "profile-kernel");
3531 if (global_options_set
.x_rs6000_current_cmodel
)
3533 SET_CMODEL (CMODEL_SMALL
);
3534 error (INVALID_32BIT
, "cmodel");
3540 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3541 This support is only in little endian GLIBC 2.32 or newer. */
3543 glibc_supports_ieee_128bit (void)
3546 if (OPTION_GLIBC
&& !BYTES_BIG_ENDIAN
3547 && ((TARGET_GLIBC_MAJOR
* 1000) + TARGET_GLIBC_MINOR
) >= 2032)
3549 #endif /* OPTION_GLIBC. */
3554 /* Override command line options.
3556 Combine build-specific configuration information with options
3557 specified on the command line to set various state variables which
3558 influence code generation, optimization, and expansion of built-in
3559 functions. Assure that command-line configuration preferences are
3560 compatible with each other and with the build configuration; issue
3561 warnings while adjusting configuration or error messages while
3562 rejecting configuration.
3564 Upon entry to this function:
3566 This function is called once at the beginning of
3567 compilation, and then again at the start and end of compiling
3568 each section of code that has a different configuration, as
3569 indicated, for example, by adding the
3571 __attribute__((__target__("cpu=power9")))
3573 qualifier to a function definition or, for example, by bracketing
3576 #pragma GCC target("altivec")
3580 #pragma GCC reset_options
3582 directives. Parameter global_init_p is true for the initial
3583 invocation, which initializes global variables, and false for all
3584 subsequent invocations.
3587 Various global state information is assumed to be valid. This
3588 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3589 default CPU specified at build configure time, TARGET_DEFAULT,
3590 representing the default set of option flags for the default
3591 target, and global_options_set.x_rs6000_isa_flags, representing
3592 which options were requested on the command line.
3594 Upon return from this function:
3596 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3597 was set by name on the command line. Additionally, if certain
3598 attributes are automatically enabled or disabled by this function
3599 in order to assure compatibility between options and
3600 configuration, the flags associated with those attributes are
3601 also set. By setting these "explicit bits", we avoid the risk
3602 that other code might accidentally overwrite these particular
3603 attributes with "default values".
3605 The various bits of rs6000_isa_flags are set to indicate the
3606 target options that have been selected for the most current
3607 compilation efforts. This has the effect of also turning on the
3608 associated TARGET_XXX values since these are macros which are
3609 generally defined to test the corresponding bit of the
3610 rs6000_isa_flags variable.
3612 The variable rs6000_builtin_mask is set to represent the target
3613 options for the most current compilation efforts, consistent with
3614 the current contents of rs6000_isa_flags. This variable controls
3615 expansion of built-in functions.
3617 Various other global variables and fields of global structures
3618 (over 50 in all) are initialized to reflect the desired options
3619 for the most current compilation efforts. */
3622 rs6000_option_override_internal (bool global_init_p
)
3626 HOST_WIDE_INT set_masks
;
3627 HOST_WIDE_INT ignore_masks
;
3630 struct cl_target_option
*main_target_opt
3631 = ((global_init_p
|| target_option_default_node
== NULL
)
3632 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3634 /* Print defaults. */
3635 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3636 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3638 /* Remember the explicit arguments. */
3640 rs6000_isa_flags_explicit
= global_options_set
.x_rs6000_isa_flags
;
3642 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3643 library functions, so warn about it. The flag may be useful for
3644 performance studies from time to time though, so don't disable it
3646 if (global_options_set
.x_rs6000_alignment_flags
3647 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3648 && DEFAULT_ABI
== ABI_DARWIN
3650 warning (0, "%qs is not supported for 64-bit Darwin;"
3651 " it is incompatible with the installed C and C++ libraries",
3654 /* Numerous experiment shows that IRA based loop pressure
3655 calculation works better for RTL loop invariant motion on targets
3656 with enough (>= 32) registers. It is an expensive optimization.
3657 So it is on only for peak performance. */
3658 if (optimize
>= 3 && global_init_p
3659 && !global_options_set
.x_flag_ira_loop_pressure
)
3660 flag_ira_loop_pressure
= 1;
3662 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3663 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3664 options were already specified. */
3665 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3666 && !global_options_set
.x_flag_asynchronous_unwind_tables
)
3667 flag_asynchronous_unwind_tables
= 1;
3669 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3670 loop unroller is active. It is only checked during unrolling, so
3671 we can just set it on by default. */
3672 if (!global_options_set
.x_flag_variable_expansion_in_unroller
)
3673 flag_variable_expansion_in_unroller
= 1;
3675 /* Set the pointer size. */
3678 rs6000_pmode
= DImode
;
3679 rs6000_pointer_size
= 64;
3683 rs6000_pmode
= SImode
;
3684 rs6000_pointer_size
= 32;
3687 /* Some OSs don't support saving the high part of 64-bit registers on context
3688 switch. Other OSs don't support saving Altivec registers. On those OSs,
3689 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3690 if the user wants either, the user must explicitly specify them and we
3691 won't interfere with the user's specification. */
3693 set_masks
= POWERPC_MASKS
;
3694 #ifdef OS_MISSING_POWERPC64
3695 if (OS_MISSING_POWERPC64
)
3696 set_masks
&= ~OPTION_MASK_POWERPC64
;
3698 #ifdef OS_MISSING_ALTIVEC
3699 if (OS_MISSING_ALTIVEC
)
3700 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
3701 | OTHER_VSX_VECTOR_MASKS
);
3704 /* Don't override by the processor default if given explicitly. */
3705 set_masks
&= ~rs6000_isa_flags_explicit
;
3707 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3708 the cpu in a target attribute or pragma, but did not specify a tuning
3709 option, use the cpu for the tuning option rather than the option specified
3710 with -mtune on the command line. Process a '--with-cpu' configuration
3711 request as an implicit --cpu. */
3712 if (rs6000_cpu_index
>= 0)
3713 cpu_index
= rs6000_cpu_index
;
3714 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
3715 cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
3716 else if (OPTION_TARGET_CPU_DEFAULT
)
3717 cpu_index
= rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT
);
3719 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3720 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3721 with those from the cpu, except for options that were explicitly set. If
3722 we don't have a cpu, do not override the target bits set in
3726 rs6000_cpu_index
= cpu_index
;
3727 rs6000_isa_flags
&= ~set_masks
;
3728 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
3733 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3734 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3735 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3736 to using rs6000_isa_flags, we need to do the initialization here.
3738 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3739 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3740 HOST_WIDE_INT flags
;
3742 flags
= TARGET_DEFAULT
;
3745 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3746 const char *default_cpu
= (!TARGET_POWERPC64
3751 int default_cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
3752 flags
= processor_target_table
[default_cpu_index
].target_enable
;
3754 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
3757 if (rs6000_tune_index
>= 0)
3758 tune_index
= rs6000_tune_index
;
3759 else if (cpu_index
>= 0)
3760 rs6000_tune_index
= tune_index
= cpu_index
;
3764 enum processor_type tune_proc
3765 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
3768 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
3769 if (processor_target_table
[i
].processor
== tune_proc
)
3777 rs6000_cpu
= processor_target_table
[cpu_index
].processor
;
3779 rs6000_cpu
= TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
;
3781 gcc_assert (tune_index
>= 0);
3782 rs6000_tune
= processor_target_table
[tune_index
].processor
;
3784 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
3785 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
3786 || rs6000_cpu
== PROCESSOR_PPCE5500
)
3789 error ("AltiVec not supported in this target");
3792 /* If we are optimizing big endian systems for space, use the load/store
3793 multiple instructions. */
3794 if (BYTES_BIG_ENDIAN
&& optimize_size
)
3795 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
;
3797 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3798 because the hardware doesn't support the instructions used in little
3799 endian mode, and causes an alignment trap. The 750 does not cause an
3800 alignment trap (except when the target is unaligned). */
3802 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
&& TARGET_MULTIPLE
)
3804 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
3805 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
3806 warning (0, "%qs is not supported on little endian systems",
3810 /* If little-endian, default to -mstrict-align on older processors.
3811 Testing for direct_move matches power8 and later. */
3812 if (!BYTES_BIG_ENDIAN
3813 && !(processor_target_table
[tune_index
].target_enable
3814 & OPTION_MASK_DIRECT_MOVE
))
3815 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
3817 if (!rs6000_fold_gimple
)
3819 "gimple folding of rs6000 builtins has been disabled.\n");
3821 /* Add some warnings for VSX. */
3824 const char *msg
= NULL
;
3825 if (!TARGET_HARD_FLOAT
)
3827 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3828 msg
= N_("%<-mvsx%> requires hardware floating point");
3831 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3832 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3835 else if (TARGET_AVOID_XFORM
> 0)
3836 msg
= N_("%<-mvsx%> needs indexed addressing");
3837 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
3838 & OPTION_MASK_ALTIVEC
))
3840 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3841 msg
= N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3843 msg
= N_("%<-mno-altivec%> disables vsx");
3849 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3850 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3854 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3855 the -mcpu setting to enable options that conflict. */
3856 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
3857 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
3858 | OPTION_MASK_ALTIVEC
3859 | OPTION_MASK_VSX
)) != 0)
3860 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
3861 | OPTION_MASK_DIRECT_MOVE
)
3862 & ~rs6000_isa_flags_explicit
);
3864 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
3865 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
3867 #ifdef XCOFF_DEBUGGING_INFO
3868 /* For AIX default to 64-bit DWARF. */
3869 if (!global_options_set
.x_dwarf_offset_size
)
3870 dwarf_offset_size
= POINTER_SIZE_UNITS
;
3873 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3874 off all of the options that depend on those flags. */
3875 ignore_masks
= rs6000_disable_incompatible_switches ();
3877 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3878 unless the user explicitly used the -mno-<option> to disable the code. */
3879 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_MISC
)
3880 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3881 else if (TARGET_P9_MINMAX
)
3885 if (cpu_index
== PROCESSOR_POWER9
)
3887 /* legacy behavior: allow -mcpu=power9 with certain
3888 capabilities explicitly disabled. */
3889 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3892 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3893 "for <xxx> less than power9", "-mcpu");
3895 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
3896 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
3897 & rs6000_isa_flags_explicit
))
3898 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3899 were explicitly cleared. */
3900 error ("%qs incompatible with explicitly disabled options",
3903 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
3905 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
3906 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
3907 else if (TARGET_VSX
)
3908 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
3909 else if (TARGET_POPCNTD
)
3910 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
3911 else if (TARGET_DFP
)
3912 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
3913 else if (TARGET_CMPB
)
3914 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
3915 else if (TARGET_FPRND
)
3916 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
3917 else if (TARGET_POPCNTB
)
3918 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
3919 else if (TARGET_ALTIVEC
)
3920 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
3922 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
3924 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
3925 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3926 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
3929 if (!TARGET_FPRND
&& TARGET_VSX
)
3931 if (rs6000_isa_flags_explicit
& OPTION_MASK_FPRND
)
3932 /* TARGET_VSX = 1 implies Power 7 and newer */
3933 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3934 rs6000_isa_flags
&= ~OPTION_MASK_FPRND
;
3937 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
3939 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
3940 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3941 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
3944 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
3946 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3947 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3948 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3951 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
3953 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3954 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
3955 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3956 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
3958 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3959 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3960 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
3964 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3966 rs6000_isa_flags
|= OPTION_MASK_VSX
;
3967 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3971 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
3973 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
3974 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3975 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
3978 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3979 silently turn off quad memory mode. */
3980 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
3982 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3983 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3985 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
3986 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3988 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
3989 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
3992 /* Non-atomic quad memory load/store are disabled for little endian, since
3993 the words are reversed, but atomic operations can still be done by
3994 swapping the words. */
3995 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
3997 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3998 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4001 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
4004 /* Assume if the user asked for normal quad memory instructions, they want
4005 the atomic versions as well, unless they explicity told us not to use quad
4006 word atomic instructions. */
4007 if (TARGET_QUAD_MEMORY
4008 && !TARGET_QUAD_MEMORY_ATOMIC
4009 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
4010 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4012 /* If we can shrink-wrap the TOC register save separately, then use
4013 -msave-toc-indirect unless explicitly disabled. */
4014 if ((rs6000_isa_flags_explicit
& OPTION_MASK_SAVE_TOC_INDIRECT
) == 0
4015 && flag_shrink_wrap_separate
4016 && optimize_function_for_speed_p (cfun
))
4017 rs6000_isa_flags
|= OPTION_MASK_SAVE_TOC_INDIRECT
;
4019 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4020 generating power8 instructions. Power9 does not optimize power8 fusion
4022 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4024 if (processor_target_table
[tune_index
].processor
== PROCESSOR_POWER8
)
4025 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4027 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4030 /* Setting additional fusion flags turns on base fusion. */
4031 if (!TARGET_P8_FUSION
&& TARGET_P8_FUSION_SIGN
)
4033 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4035 if (TARGET_P8_FUSION_SIGN
)
4036 error ("%qs requires %qs", "-mpower8-fusion-sign",
4039 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4042 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4045 /* Power8 does not fuse sign extended loads with the addis. If we are
4046 optimizing at high levels for speed, convert a sign extended load into a
4047 zero extending load, and an explicit sign extension. */
4048 if (TARGET_P8_FUSION
4049 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4050 && optimize_function_for_speed_p (cfun
)
4052 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4054 /* ISA 3.0 vector instructions include ISA 2.07. */
4055 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4057 /* We prefer to not mention undocumented options in
4058 error messages. However, if users have managed to select
4059 power9-vector without selecting power8-vector, they
4060 already know about undocumented flags. */
4061 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
4062 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
4063 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4064 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
4066 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4067 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4068 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4072 /* OPTION_MASK_P9_VECTOR is explicit and
4073 OPTION_MASK_P8_VECTOR is not explicit. */
4074 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
4075 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4079 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4080 support. If we only have ISA 2.06 support, and the user did not specify
4081 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4082 but we don't enable the full vectorization support */
4083 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4084 TARGET_ALLOW_MOVMISALIGN
= 1;
4086 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4088 if (TARGET_ALLOW_MOVMISALIGN
> 0
4089 && global_options_set
.x_TARGET_ALLOW_MOVMISALIGN
)
4090 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4092 TARGET_ALLOW_MOVMISALIGN
= 0;
4095 /* Determine when unaligned vector accesses are permitted, and when
4096 they are preferred over masked Altivec loads. Note that if
4097 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4098 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4100 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4104 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4105 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4107 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4110 else if (!TARGET_ALLOW_MOVMISALIGN
)
4112 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4113 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4114 "-mallow-movmisalign");
4116 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4120 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
))
4122 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4123 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4125 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4128 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
))
4130 if (TARGET_MMA
&& TARGET_EFFICIENT_UNALIGNED_VSX
)
4131 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
;
4133 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
;
4136 /* Use long double size to select the appropriate long double. We use
4137 TYPE_PRECISION to differentiate the 3 different long double types. We map
4138 128 into the precision used for TFmode. */
4139 int default_long_double_size
= (RS6000_DEFAULT_LONG_DOUBLE_SIZE
== 64
4141 : FLOAT_PRECISION_TFmode
);
4143 /* Set long double size before the IEEE 128-bit tests. */
4144 if (!global_options_set
.x_rs6000_long_double_type_size
)
4146 if (main_target_opt
!= NULL
4147 && (main_target_opt
->x_rs6000_long_double_type_size
4148 != default_long_double_size
))
4149 error ("target attribute or pragma changes %<long double%> size");
4151 rs6000_long_double_type_size
= default_long_double_size
;
4153 else if (rs6000_long_double_type_size
== 128)
4154 rs6000_long_double_type_size
= FLOAT_PRECISION_TFmode
;
4155 else if (global_options_set
.x_rs6000_ieeequad
)
4157 if (global_options
.x_rs6000_ieeequad
)
4158 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4160 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4163 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4164 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4165 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4166 those systems will not pick up this default. Warn if the user changes the
4167 default unless -Wno-psabi. */
4168 if (!global_options_set
.x_rs6000_ieeequad
)
4169 rs6000_ieeequad
= TARGET_IEEEQUAD_DEFAULT
;
4173 if (global_options
.x_rs6000_ieeequad
4174 && (!TARGET_POPCNTD
|| !TARGET_VSX
))
4175 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4177 if (rs6000_ieeequad
!= TARGET_IEEEQUAD_DEFAULT
&& TARGET_LONG_DOUBLE_128
)
4179 /* Determine if the user can change the default long double type at
4180 compilation time. Only C and C++ support this, and you need GLIBC
4181 2.32 or newer. Only issue one warning. */
4182 static bool warned_change_long_double
;
4184 if (!warned_change_long_double
4185 && (!glibc_supports_ieee_128bit ()
4186 || (!lang_GNU_C () && !lang_GNU_CXX ())))
4188 warned_change_long_double
= true;
4189 if (TARGET_IEEEQUAD
)
4190 warning (OPT_Wpsabi
, "Using IEEE extended precision "
4193 warning (OPT_Wpsabi
, "Using IBM extended precision "
4199 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4200 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4201 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4202 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4203 the keyword as well as the type. */
4204 TARGET_FLOAT128_TYPE
= TARGET_FLOAT128_ENABLE_TYPE
&& TARGET_VSX
;
4206 /* IEEE 128-bit floating point requires VSX support. */
4207 if (TARGET_FLOAT128_KEYWORD
)
4211 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4212 error ("%qs requires VSX support", "-mfloat128");
4214 TARGET_FLOAT128_TYPE
= 0;
4215 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_KEYWORD
4216 | OPTION_MASK_FLOAT128_HW
);
4218 else if (!TARGET_FLOAT128_TYPE
)
4220 TARGET_FLOAT128_TYPE
= 1;
4221 warning (0, "The %<-mfloat128%> option may not be fully supported");
4225 /* Enable the __float128 keyword under Linux by default. */
4226 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_KEYWORD
4227 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4228 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4230 /* If we have are supporting the float128 type and full ISA 3.0 support,
4231 enable -mfloat128-hardware by default. However, don't enable the
4232 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4233 because sometimes the compiler wants to put things in an integer
4234 container, and if we don't have __int128 support, it is impossible. */
4235 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
&& TARGET_64BIT
4236 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4237 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4238 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4240 if (TARGET_FLOAT128_HW
4241 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4243 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4244 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4246 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4249 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4251 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4252 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4254 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4257 /* Enable -mprefixed by default on power10 systems. */
4258 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) == 0)
4259 rs6000_isa_flags
|= OPTION_MASK_PREFIXED
;
4261 /* -mprefixed requires -mcpu=power10 (or later). */
4262 else if (TARGET_PREFIXED
&& !TARGET_POWER10
)
4264 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) != 0)
4265 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4267 rs6000_isa_flags
&= ~OPTION_MASK_PREFIXED
;
4270 /* -mpcrel requires prefixed load/store addressing. */
4271 if (TARGET_PCREL
&& !TARGET_PREFIXED
)
4273 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4274 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4276 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4279 /* Print the options after updating the defaults. */
4280 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4281 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4283 /* E500mc does "better" if we inline more aggressively. Respect the
4284 user's opinion, though. */
4285 if (rs6000_block_move_inline_limit
== 0
4286 && (rs6000_tune
== PROCESSOR_PPCE500MC
4287 || rs6000_tune
== PROCESSOR_PPCE500MC64
4288 || rs6000_tune
== PROCESSOR_PPCE5500
4289 || rs6000_tune
== PROCESSOR_PPCE6500
))
4290 rs6000_block_move_inline_limit
= 128;
4292 /* store_one_arg depends on expand_block_move to handle at least the
4293 size of reg_parm_stack_space. */
4294 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4295 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4299 /* If the appropriate debug option is enabled, replace the target hooks
4300 with debug versions that call the real version and then prints
4301 debugging information. */
4302 if (TARGET_DEBUG_COST
)
4304 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4305 targetm
.address_cost
= rs6000_debug_address_cost
;
4306 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4309 if (TARGET_DEBUG_ADDR
)
4311 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4312 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4313 rs6000_secondary_reload_class_ptr
4314 = rs6000_debug_secondary_reload_class
;
4315 targetm
.secondary_memory_needed
4316 = rs6000_debug_secondary_memory_needed
;
4317 targetm
.can_change_mode_class
4318 = rs6000_debug_can_change_mode_class
;
4319 rs6000_preferred_reload_class_ptr
4320 = rs6000_debug_preferred_reload_class
;
4321 rs6000_mode_dependent_address_ptr
4322 = rs6000_debug_mode_dependent_address
;
4325 if (rs6000_veclibabi_name
)
4327 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4328 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4331 error ("unknown vectorization library ABI type (%qs) for "
4332 "%qs switch", rs6000_veclibabi_name
, "-mveclibabi=");
4338 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4339 target attribute or pragma which automatically enables both options,
4340 unless the altivec ABI was set. This is set by default for 64-bit, but
4342 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4344 TARGET_FLOAT128_TYPE
= 0;
4345 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
4346 | OPTION_MASK_FLOAT128_KEYWORD
)
4347 & ~rs6000_isa_flags_explicit
);
4350 /* Enable Altivec ABI for AIX -maltivec. */
4352 && (TARGET_ALTIVEC
|| TARGET_VSX
)
4353 && !global_options_set
.x_rs6000_altivec_abi
)
4355 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4356 error ("target attribute or pragma changes AltiVec ABI");
4358 rs6000_altivec_abi
= 1;
4361 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4362 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4363 be explicitly overridden in either case. */
4366 if (!global_options_set
.x_rs6000_altivec_abi
4367 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4369 if (main_target_opt
!= NULL
&&
4370 !main_target_opt
->x_rs6000_altivec_abi
)
4371 error ("target attribute or pragma changes AltiVec ABI");
4373 rs6000_altivec_abi
= 1;
4377 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4378 So far, the only darwin64 targets are also MACH-O. */
4380 && DEFAULT_ABI
== ABI_DARWIN
4383 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4384 error ("target attribute or pragma changes darwin64 ABI");
4387 rs6000_darwin64_abi
= 1;
4388 /* Default to natural alignment, for better performance. */
4389 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4393 /* Place FP constants in the constant pool instead of TOC
4394 if section anchors enabled. */
4395 if (flag_section_anchors
4396 && !global_options_set
.x_TARGET_NO_FP_IN_TOC
)
4397 TARGET_NO_FP_IN_TOC
= 1;
4399 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4400 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4402 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4403 SUBTARGET_OVERRIDE_OPTIONS
;
4405 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4406 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4408 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4409 SUB3TARGET_OVERRIDE_OPTIONS
;
4412 /* If the ABI has support for PC-relative relocations, enable it by default.
4413 This test depends on the sub-target tests above setting the code model to
4414 medium for ELF v2 systems. */
4415 if (PCREL_SUPPORTED_BY_OS
4416 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0)
4417 rs6000_isa_flags
|= OPTION_MASK_PCREL
;
4419 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4420 after the subtarget override options are done. */
4421 else if (TARGET_PCREL
&& TARGET_CMODEL
!= CMODEL_MEDIUM
)
4423 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4424 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4426 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4429 /* Enable -mmma by default on power10 systems. */
4430 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) == 0)
4431 rs6000_isa_flags
|= OPTION_MASK_MMA
;
4433 /* Turn off vector pair/mma options on non-power10 systems. */
4434 else if (!TARGET_POWER10
&& TARGET_MMA
)
4436 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4437 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4439 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4442 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4443 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
4445 rs6000_always_hint
= (rs6000_tune
!= PROCESSOR_POWER4
4446 && rs6000_tune
!= PROCESSOR_POWER5
4447 && rs6000_tune
!= PROCESSOR_POWER6
4448 && rs6000_tune
!= PROCESSOR_POWER7
4449 && rs6000_tune
!= PROCESSOR_POWER8
4450 && rs6000_tune
!= PROCESSOR_POWER9
4451 && rs6000_tune
!= PROCESSOR_POWER10
4452 && rs6000_tune
!= PROCESSOR_PPCA2
4453 && rs6000_tune
!= PROCESSOR_CELL
4454 && rs6000_tune
!= PROCESSOR_PPC476
);
4455 rs6000_sched_groups
= (rs6000_tune
== PROCESSOR_POWER4
4456 || rs6000_tune
== PROCESSOR_POWER5
4457 || rs6000_tune
== PROCESSOR_POWER7
4458 || rs6000_tune
== PROCESSOR_POWER8
);
4459 rs6000_align_branch_targets
= (rs6000_tune
== PROCESSOR_POWER4
4460 || rs6000_tune
== PROCESSOR_POWER5
4461 || rs6000_tune
== PROCESSOR_POWER6
4462 || rs6000_tune
== PROCESSOR_POWER7
4463 || rs6000_tune
== PROCESSOR_POWER8
4464 || rs6000_tune
== PROCESSOR_POWER9
4465 || rs6000_tune
== PROCESSOR_POWER10
4466 || rs6000_tune
== PROCESSOR_PPCE500MC
4467 || rs6000_tune
== PROCESSOR_PPCE500MC64
4468 || rs6000_tune
== PROCESSOR_PPCE5500
4469 || rs6000_tune
== PROCESSOR_PPCE6500
);
4471 /* Allow debug switches to override the above settings. These are set to -1
4472 in rs6000.opt to indicate the user hasn't directly set the switch. */
4473 if (TARGET_ALWAYS_HINT
>= 0)
4474 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
4476 if (TARGET_SCHED_GROUPS
>= 0)
4477 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
4479 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
4480 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
4482 rs6000_sched_restricted_insns_priority
4483 = (rs6000_sched_groups
? 1 : 0);
4485 /* Handle -msched-costly-dep option. */
4486 rs6000_sched_costly_dep
4487 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
4489 if (rs6000_sched_costly_dep_str
)
4491 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
4492 rs6000_sched_costly_dep
= no_dep_costly
;
4493 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
4494 rs6000_sched_costly_dep
= all_deps_costly
;
4495 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
4496 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
4497 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
4498 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
4500 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
4501 atoi (rs6000_sched_costly_dep_str
));
4504 /* Handle -minsert-sched-nops option. */
4505 rs6000_sched_insert_nops
4506 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
4508 if (rs6000_sched_insert_nops_str
)
4510 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
4511 rs6000_sched_insert_nops
= sched_finish_none
;
4512 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
4513 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
4514 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
4515 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
4517 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
4518 atoi (rs6000_sched_insert_nops_str
));
4521 /* Handle stack protector */
4522 if (!global_options_set
.x_rs6000_stack_protector_guard
)
4523 #ifdef TARGET_THREAD_SSP_OFFSET
4524 rs6000_stack_protector_guard
= SSP_TLS
;
4526 rs6000_stack_protector_guard
= SSP_GLOBAL
;
4529 #ifdef TARGET_THREAD_SSP_OFFSET
4530 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
4531 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
4534 if (global_options_set
.x_rs6000_stack_protector_guard_offset_str
)
4537 const char *str
= rs6000_stack_protector_guard_offset_str
;
4540 long offset
= strtol (str
, &endp
, 0);
4541 if (!*str
|| *endp
|| errno
)
4542 error ("%qs is not a valid number in %qs", str
,
4543 "-mstack-protector-guard-offset=");
4545 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
4546 || (TARGET_64BIT
&& (offset
& 3)))
4547 error ("%qs is not a valid offset in %qs", str
,
4548 "-mstack-protector-guard-offset=");
4550 rs6000_stack_protector_guard_offset
= offset
;
4553 if (global_options_set
.x_rs6000_stack_protector_guard_reg_str
)
4555 const char *str
= rs6000_stack_protector_guard_reg_str
;
4556 int reg
= decode_reg_name (str
);
4558 if (!IN_RANGE (reg
, 1, 31))
4559 error ("%qs is not a valid base register in %qs", str
,
4560 "-mstack-protector-guard-reg=");
4562 rs6000_stack_protector_guard_reg
= reg
;
4565 if (rs6000_stack_protector_guard
== SSP_TLS
4566 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
4567 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4571 #ifdef TARGET_REGNAMES
4572 /* If the user desires alternate register names, copy in the
4573 alternate names now. */
4574 if (TARGET_REGNAMES
)
4575 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
4578 /* Set aix_struct_return last, after the ABI is determined.
4579 If -maix-struct-return or -msvr4-struct-return was explicitly
4580 used, don't override with the ABI default. */
4581 if (!global_options_set
.x_aix_struct_return
)
4582 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
4585 /* IBM XL compiler defaults to unsigned bitfields. */
4586 if (TARGET_XL_COMPAT
)
4587 flag_signed_bitfields
= 0;
4590 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
4591 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
4593 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
4595 /* We can only guarantee the availability of DI pseudo-ops when
4596 assembling for 64-bit targets. */
4599 targetm
.asm_out
.aligned_op
.di
= NULL
;
4600 targetm
.asm_out
.unaligned_op
.di
= NULL
;
4604 /* Set branch target alignment, if not optimizing for size. */
4607 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4608 aligned 8byte to avoid misprediction by the branch predictor. */
4609 if (rs6000_tune
== PROCESSOR_TITAN
4610 || rs6000_tune
== PROCESSOR_CELL
)
4612 if (flag_align_functions
&& !str_align_functions
)
4613 str_align_functions
= "8";
4614 if (flag_align_jumps
&& !str_align_jumps
)
4615 str_align_jumps
= "8";
4616 if (flag_align_loops
&& !str_align_loops
)
4617 str_align_loops
= "8";
4619 if (rs6000_align_branch_targets
)
4621 if (flag_align_functions
&& !str_align_functions
)
4622 str_align_functions
= "16";
4623 if (flag_align_jumps
&& !str_align_jumps
)
4624 str_align_jumps
= "16";
4625 if (flag_align_loops
&& !str_align_loops
)
4627 can_override_loop_align
= 1;
4628 str_align_loops
= "16";
4633 /* Arrange to save and restore machine status around nested functions. */
4634 init_machine_status
= rs6000_init_machine_status
;
4636 /* We should always be splitting complex arguments, but we can't break
4637 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4638 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
4639 targetm
.calls
.split_complex_arg
= NULL
;
4641 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4642 if (DEFAULT_ABI
== ABI_AIX
)
4643 targetm
.calls
.custom_function_descriptors
= 0;
4646 /* Initialize rs6000_cost with the appropriate target costs. */
4648 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
4650 switch (rs6000_tune
)
4652 case PROCESSOR_RS64A
:
4653 rs6000_cost
= &rs64a_cost
;
4656 case PROCESSOR_MPCCORE
:
4657 rs6000_cost
= &mpccore_cost
;
4660 case PROCESSOR_PPC403
:
4661 rs6000_cost
= &ppc403_cost
;
4664 case PROCESSOR_PPC405
:
4665 rs6000_cost
= &ppc405_cost
;
4668 case PROCESSOR_PPC440
:
4669 rs6000_cost
= &ppc440_cost
;
4672 case PROCESSOR_PPC476
:
4673 rs6000_cost
= &ppc476_cost
;
4676 case PROCESSOR_PPC601
:
4677 rs6000_cost
= &ppc601_cost
;
4680 case PROCESSOR_PPC603
:
4681 rs6000_cost
= &ppc603_cost
;
4684 case PROCESSOR_PPC604
:
4685 rs6000_cost
= &ppc604_cost
;
4688 case PROCESSOR_PPC604e
:
4689 rs6000_cost
= &ppc604e_cost
;
4692 case PROCESSOR_PPC620
:
4693 rs6000_cost
= &ppc620_cost
;
4696 case PROCESSOR_PPC630
:
4697 rs6000_cost
= &ppc630_cost
;
4700 case PROCESSOR_CELL
:
4701 rs6000_cost
= &ppccell_cost
;
4704 case PROCESSOR_PPC750
:
4705 case PROCESSOR_PPC7400
:
4706 rs6000_cost
= &ppc750_cost
;
4709 case PROCESSOR_PPC7450
:
4710 rs6000_cost
= &ppc7450_cost
;
4713 case PROCESSOR_PPC8540
:
4714 case PROCESSOR_PPC8548
:
4715 rs6000_cost
= &ppc8540_cost
;
4718 case PROCESSOR_PPCE300C2
:
4719 case PROCESSOR_PPCE300C3
:
4720 rs6000_cost
= &ppce300c2c3_cost
;
4723 case PROCESSOR_PPCE500MC
:
4724 rs6000_cost
= &ppce500mc_cost
;
4727 case PROCESSOR_PPCE500MC64
:
4728 rs6000_cost
= &ppce500mc64_cost
;
4731 case PROCESSOR_PPCE5500
:
4732 rs6000_cost
= &ppce5500_cost
;
4735 case PROCESSOR_PPCE6500
:
4736 rs6000_cost
= &ppce6500_cost
;
4739 case PROCESSOR_TITAN
:
4740 rs6000_cost
= &titan_cost
;
4743 case PROCESSOR_POWER4
:
4744 case PROCESSOR_POWER5
:
4745 rs6000_cost
= &power4_cost
;
4748 case PROCESSOR_POWER6
:
4749 rs6000_cost
= &power6_cost
;
4752 case PROCESSOR_POWER7
:
4753 rs6000_cost
= &power7_cost
;
4756 case PROCESSOR_POWER8
:
4757 rs6000_cost
= &power8_cost
;
4760 case PROCESSOR_POWER9
:
4761 case PROCESSOR_POWER10
:
4762 rs6000_cost
= &power9_cost
;
4765 case PROCESSOR_PPCA2
:
4766 rs6000_cost
= &ppca2_cost
;
4775 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4776 param_simultaneous_prefetches
,
4777 rs6000_cost
->simultaneous_prefetches
);
4778 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4779 param_l1_cache_size
,
4780 rs6000_cost
->l1_cache_size
);
4781 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4782 param_l1_cache_line_size
,
4783 rs6000_cost
->cache_line_size
);
4784 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4785 param_l2_cache_size
,
4786 rs6000_cost
->l2_cache_size
);
4788 /* Increase loop peeling limits based on performance analysis. */
4789 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4790 param_max_peeled_insns
, 400);
4791 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4792 param_max_completely_peeled_insns
, 400);
4794 /* The lxvl/stxvl instructions don't perform well before Power10. */
4796 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4797 param_vect_partial_vector_usage
, 1);
4799 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4800 param_vect_partial_vector_usage
, 0);
4802 /* Use the 'model' -fsched-pressure algorithm by default. */
4803 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4804 param_sched_pressure_algorithm
,
4805 SCHED_PRESSURE_MODEL
);
4807 /* If using typedef char *va_list, signal that
4808 __builtin_va_start (&ap, 0) can be optimized to
4809 ap = __builtin_next_arg (0). */
4810 if (DEFAULT_ABI
!= ABI_V4
)
4811 targetm
.expand_builtin_va_start
= NULL
;
4814 rs6000_override_options_after_change ();
4816 /* If not explicitly specified via option, decide whether to generate indexed
4817 load/store instructions. A value of -1 indicates that the
4818 initial value of this variable has not been overwritten. During
4819 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4820 if (TARGET_AVOID_XFORM
== -1)
4821 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4822 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4823 need indexed accesses and the type used is the scalar type of the element
4824 being loaded or stored. */
4825 TARGET_AVOID_XFORM
= (rs6000_tune
== PROCESSOR_POWER6
&& TARGET_CMPB
4826 && !TARGET_ALTIVEC
);
4828 /* Set the -mrecip options. */
4829 if (rs6000_recip_name
)
4831 char *p
= ASTRDUP (rs6000_recip_name
);
4833 unsigned int mask
, i
;
4836 while ((q
= strtok (p
, ",")) != NULL
)
4847 if (!strcmp (q
, "default"))
4848 mask
= ((TARGET_RECIP_PRECISION
)
4849 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
4852 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4853 if (!strcmp (q
, recip_options
[i
].string
))
4855 mask
= recip_options
[i
].mask
;
4859 if (i
== ARRAY_SIZE (recip_options
))
4861 error ("unknown option for %<%s=%s%>", "-mrecip", q
);
4869 rs6000_recip_control
&= ~mask
;
4871 rs6000_recip_control
|= mask
;
4875 /* Set the builtin mask of the various options used that could affect which
4876 builtins were used. In the past we used target_flags, but we've run out
4877 of bits, and some options are no longer in target_flags. */
4878 rs6000_builtin_mask
= rs6000_builtin_mask_calculate ();
4879 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
4880 rs6000_print_builtin_options (stderr
, 0, "builtin mask",
4881 rs6000_builtin_mask
);
4883 /* Initialize all of the registers. */
4884 rs6000_init_hard_regno_mode_ok (global_init_p
);
4886 /* Save the initial options in case the user does function specific options */
4888 target_option_default_node
= target_option_current_node
4889 = build_target_option_node (&global_options
, &global_options_set
);
4891 /* If not explicitly specified via option, decide whether to generate the
4892 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4893 if (TARGET_LINK_STACK
== -1)
4894 SET_TARGET_LINK_STACK (rs6000_tune
== PROCESSOR_PPC476
&& flag_pic
);
4896 /* Deprecate use of -mno-speculate-indirect-jumps. */
4897 if (!rs6000_speculate_indirect_jumps
)
4898 warning (0, "%qs is deprecated and not recommended in any circumstances",
4899 "-mno-speculate-indirect-jumps");
4904 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4905 define the target cpu type. */
4908 rs6000_option_override (void)
4910 (void) rs6000_option_override_internal (true);
4914 /* Implement targetm.vectorize.builtin_mask_for_load. */
4916 rs6000_builtin_mask_for_load (void)
4918 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4919 if ((TARGET_ALTIVEC
&& !TARGET_VSX
)
4920 || (TARGET_VSX
&& !TARGET_EFFICIENT_UNALIGNED_VSX
))
4921 return altivec_builtin_mask_for_load
;
4926 /* Implement LOOP_ALIGN. */
4928 rs6000_loop_align (rtx label
)
4933 /* Don't override loop alignment if -falign-loops was specified. */
4934 if (!can_override_loop_align
)
4937 bb
= BLOCK_FOR_INSN (label
);
4938 ninsns
= num_loop_insns(bb
->loop_father
);
4940 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4941 if (ninsns
> 4 && ninsns
<= 8
4942 && (rs6000_tune
== PROCESSOR_POWER4
4943 || rs6000_tune
== PROCESSOR_POWER5
4944 || rs6000_tune
== PROCESSOR_POWER6
4945 || rs6000_tune
== PROCESSOR_POWER7
4946 || rs6000_tune
== PROCESSOR_POWER8
))
4947 return align_flags (5);
4952 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4953 after applying N number of iterations. This routine does not determine
4954 how may iterations are required to reach desired alignment. */
4957 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
4964 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
4967 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
4977 /* Assuming that all other types are naturally aligned. CHECKME! */
4982 /* Return true if the vector misalignment factor is supported by the
4985 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
4992 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4995 /* Return if movmisalign pattern is not supported for this mode. */
4996 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
4999 if (misalignment
== -1)
5001 /* Misalignment factor is unknown at compile time but we know
5002 it's word aligned. */
5003 if (rs6000_vector_alignment_reachable (type
, is_packed
))
5005 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
5007 if (element_size
== 64 || element_size
== 32)
5014 /* VSX supports word-aligned vector. */
5015 if (misalignment
% 4 == 0)
5021 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5023 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5024 tree vectype
, int misalign
)
5029 switch (type_of_cost
)
5037 case cond_branch_not_taken
:
5041 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5045 /* Power7 has only one permute unit, make it a bit expensive. */
5046 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5051 case vec_promote_demote
:
5052 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5053 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5058 case cond_branch_taken
:
5061 case unaligned_load
:
5062 case vector_gather_load
:
5063 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5064 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5067 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5069 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5071 /* Double word aligned. */
5079 /* Double word aligned. */
5083 /* Unknown misalignment. */
5096 /* Misaligned loads are not supported. */
5099 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5102 case unaligned_store
:
5103 case vector_scatter_store
:
5104 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5107 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5109 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5111 /* Double word aligned. */
5119 /* Double word aligned. */
5123 /* Unknown misalignment. */
5136 /* Misaligned stores are not supported. */
5142 /* This is a rough approximation assuming non-constant elements
5143 constructed into a vector via element insertion. FIXME:
5144 vec_construct is not granular enough for uniformly good
5145 decisions. If the initialization is a splat, this is
5146 cheaper than we estimate. Improve this someday. */
5147 elem_type
= TREE_TYPE (vectype
);
5148 /* 32-bit vectors loaded into registers are stored as double
5149 precision, so we need 2 permutes, 2 converts, and 1 merge
5150 to construct a vector of short floats from them. */
5151 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5152 && TYPE_PRECISION (elem_type
) == 32)
5154 /* On POWER9, integer vector types are built up in GPRs and then
5155 use a direct move (2 cycles). For POWER8 this is even worse,
5156 as we need two direct moves and a merge, and the direct moves
5158 else if (INTEGRAL_TYPE_P (elem_type
))
5160 if (TARGET_P9_VECTOR
)
5161 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5163 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 5;
5166 /* V2DFmode doesn't need a direct move. */
5174 /* Implement targetm.vectorize.preferred_simd_mode. */
5177 rs6000_preferred_simd_mode (scalar_mode mode
)
5179 opt_machine_mode vmode
= mode_for_vector (mode
, 16 / GET_MODE_SIZE (mode
));
5181 if (vmode
.exists () && !VECTOR_MEM_NONE_P (vmode
.require ()))
5182 return vmode
.require ();
5187 typedef struct _rs6000_cost_data
5189 struct loop
*loop_info
;
5193 /* Test for likely overcommitment of vector hardware resources. If a
5194 loop iteration is relatively large, and too large a percentage of
5195 instructions in the loop are vectorized, the cost model may not
5196 adequately reflect delays from unavailable vector resources.
5197 Penalize the loop body cost for this case. */
5200 rs6000_density_test (rs6000_cost_data
*data
)
5202 const int DENSITY_PCT_THRESHOLD
= 85;
5203 const int DENSITY_SIZE_THRESHOLD
= 70;
5204 const int DENSITY_PENALTY
= 10;
5205 struct loop
*loop
= data
->loop_info
;
5206 basic_block
*bbs
= get_loop_body (loop
);
5207 int nbbs
= loop
->num_nodes
;
5208 loop_vec_info loop_vinfo
= loop_vec_info_for_loop (data
->loop_info
);
5209 int vec_cost
= data
->cost
[vect_body
], not_vec_cost
= 0;
5212 for (i
= 0; i
< nbbs
; i
++)
5214 basic_block bb
= bbs
[i
];
5215 gimple_stmt_iterator gsi
;
5217 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5219 gimple
*stmt
= gsi_stmt (gsi
);
5220 if (is_gimple_debug (stmt
))
5223 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
5225 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5226 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5232 density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5234 if (density_pct
> DENSITY_PCT_THRESHOLD
5235 && vec_cost
+ not_vec_cost
> DENSITY_SIZE_THRESHOLD
)
5237 data
->cost
[vect_body
] = vec_cost
* (100 + DENSITY_PENALTY
) / 100;
5238 if (dump_enabled_p ())
5239 dump_printf_loc (MSG_NOTE
, vect_location
,
5240 "density %d%%, cost %d exceeds threshold, penalizing "
5241 "loop body cost by %d%%", density_pct
,
5242 vec_cost
+ not_vec_cost
, DENSITY_PENALTY
);
5246 /* Implement targetm.vectorize.init_cost. */
5248 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5249 instruction is needed by the vectorization. */
5250 static bool rs6000_vect_nonmem
;
5253 rs6000_init_cost (struct loop
*loop_info
)
5255 rs6000_cost_data
*data
= XNEW (struct _rs6000_cost_data
);
5256 data
->loop_info
= loop_info
;
5257 data
->cost
[vect_prologue
] = 0;
5258 data
->cost
[vect_body
] = 0;
5259 data
->cost
[vect_epilogue
] = 0;
5260 rs6000_vect_nonmem
= false;
5264 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5265 For some statement, we would like to further fine-grain tweak the cost on
5266 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5267 information on statement operation codes etc. One typical case here is
5268 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5269 for scalar cost, but it should be priced more whatever transformed to either
5270 compare + branch or compare + isel instructions. */
5273 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind
,
5274 struct _stmt_vec_info
*stmt_info
)
5276 if (kind
== scalar_stmt
&& stmt_info
&& stmt_info
->stmt
5277 && gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
5279 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
5280 if (subcode
== COND_EXPR
)
5287 /* Implement targetm.vectorize.add_stmt_cost. */
5290 rs6000_add_stmt_cost (class vec_info
*vinfo
, void *data
, int count
,
5291 enum vect_cost_for_stmt kind
,
5292 struct _stmt_vec_info
*stmt_info
, tree vectype
,
5293 int misalign
, enum vect_cost_model_location where
)
5295 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
5296 unsigned retval
= 0;
5298 if (flag_vect_cost_model
)
5300 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5302 stmt_cost
+= rs6000_adjust_vect_cost_per_stmt (kind
, stmt_info
);
5303 /* Statements in an inner loop relative to the loop being
5304 vectorized are weighted more heavily. The value here is
5305 arbitrary and could potentially be improved with analysis. */
5306 if (where
== vect_body
&& stmt_info
5307 && stmt_in_inner_loop_p (vinfo
, stmt_info
))
5308 count
*= 50; /* FIXME. */
5310 retval
= (unsigned) (count
* stmt_cost
);
5311 cost_data
->cost
[where
] += retval
;
5313 /* Check whether we're doing something other than just a copy loop.
5314 Not all such loops may be profitably vectorized; see
5315 rs6000_finish_cost. */
5316 if ((kind
== vec_to_scalar
|| kind
== vec_perm
5317 || kind
== vec_promote_demote
|| kind
== vec_construct
5318 || kind
== scalar_to_vec
)
5319 || (where
== vect_body
&& kind
== vector_stmt
))
5320 rs6000_vect_nonmem
= true;
5326 /* For some target specific vectorization cost which can't be handled per stmt,
5327 we check the requisite conditions and adjust the vectorization cost
5328 accordingly if satisfied. One typical example is to model shift cost for
5329 vector with length by counting number of required lengths under condition
5330 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5333 rs6000_adjust_vect_cost_per_loop (rs6000_cost_data
*data
)
5335 struct loop
*loop
= data
->loop_info
;
5337 loop_vec_info loop_vinfo
= loop_vec_info_for_loop (loop
);
5339 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
5341 rgroup_controls
*rgc
;
5342 unsigned int num_vectors_m1
;
5343 unsigned int shift_cnt
= 0;
5344 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo
), num_vectors_m1
, rgc
)
5346 /* Each length needs one shift to fill into bits 0-7. */
5347 shift_cnt
+= num_vectors_m1
+ 1;
5349 rs6000_add_stmt_cost (loop_vinfo
, (void *) data
, shift_cnt
, scalar_stmt
,
5350 NULL
, NULL_TREE
, 0, vect_body
);
5354 /* Implement targetm.vectorize.finish_cost. */
5357 rs6000_finish_cost (void *data
, unsigned *prologue_cost
,
5358 unsigned *body_cost
, unsigned *epilogue_cost
)
5360 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
5362 if (cost_data
->loop_info
)
5364 rs6000_adjust_vect_cost_per_loop (cost_data
);
5365 rs6000_density_test (cost_data
);
5368 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5369 that require versioning for any reason. The vectorization is at
5370 best a wash inside the loop, and the versioning checks make
5371 profitability highly unlikely and potentially quite harmful. */
5372 if (cost_data
->loop_info
)
5374 loop_vec_info vec_info
= loop_vec_info_for_loop (cost_data
->loop_info
);
5375 if (!rs6000_vect_nonmem
5376 && LOOP_VINFO_VECT_FACTOR (vec_info
) == 2
5377 && LOOP_REQUIRES_VERSIONING (vec_info
))
5378 cost_data
->cost
[vect_body
] += 10000;
5381 *prologue_cost
= cost_data
->cost
[vect_prologue
];
5382 *body_cost
= cost_data
->cost
[vect_body
];
5383 *epilogue_cost
= cost_data
->cost
[vect_epilogue
];
5386 /* Implement targetm.vectorize.destroy_cost_data. */
5389 rs6000_destroy_cost_data (void *data
)
5394 /* Implement targetm.loop_unroll_adjust. */
5397 rs6000_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
5399 if (unroll_only_small_loops
)
5401 /* TODO: These are hardcoded values right now. We probably should use
5403 if (loop
->ninsns
<= 6)
5404 return MIN (4, nunroll
);
5405 if (loop
->ninsns
<= 10)
5406 return MIN (2, nunroll
);
5414 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5415 library with vectorized intrinsics. */
5418 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5422 const char *suffix
= NULL
;
5423 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5426 machine_mode el_mode
, in_mode
;
5429 /* Libmass is suitable for unsafe math only as it does not correctly support
5430 parts of IEEE with the required precision such as denormals. Only support
5431 it if we have VSX to use the simd d2 or f4 functions.
5432 XXX: Add variable length support. */
5433 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5436 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5437 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5438 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5439 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5440 if (el_mode
!= in_mode
5476 if (el_mode
== DFmode
&& n
== 2)
5478 bdecl
= mathfn_built_in (double_type_node
, fn
);
5479 suffix
= "d2"; /* pow -> powd2 */
5481 else if (el_mode
== SFmode
&& n
== 4)
5483 bdecl
= mathfn_built_in (float_type_node
, fn
);
5484 suffix
= "4"; /* powf -> powf4 */
5496 gcc_assert (suffix
!= NULL
);
5497 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
5501 strcpy (name
, bname
+ strlen ("__builtin_"));
5502 strcat (name
, suffix
);
5505 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
5506 else if (n_args
== 2)
5507 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
5511 /* Build a function declaration for the vectorized function. */
5512 new_fndecl
= build_decl (BUILTINS_LOCATION
,
5513 FUNCTION_DECL
, get_identifier (name
), fntype
);
5514 TREE_PUBLIC (new_fndecl
) = 1;
5515 DECL_EXTERNAL (new_fndecl
) = 1;
5516 DECL_IS_NOVOPS (new_fndecl
) = 1;
5517 TREE_READONLY (new_fndecl
) = 1;
5522 /* Returns a function decl for a vectorized version of the builtin function
5523 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5524 if it is not available. */
5527 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
5530 machine_mode in_mode
, out_mode
;
5533 if (TARGET_DEBUG_BUILTIN
)
5534 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5535 combined_fn_name (combined_fn (fn
)),
5536 GET_MODE_NAME (TYPE_MODE (type_out
)),
5537 GET_MODE_NAME (TYPE_MODE (type_in
)));
5539 if (TREE_CODE (type_out
) != VECTOR_TYPE
5540 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5543 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5544 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5545 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5546 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5551 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5552 && out_mode
== DFmode
&& out_n
== 2
5553 && in_mode
== DFmode
&& in_n
== 2)
5554 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNDP
];
5555 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5556 && out_mode
== SFmode
&& out_n
== 4
5557 && in_mode
== SFmode
&& in_n
== 4)
5558 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNSP
];
5559 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5560 && out_mode
== SFmode
&& out_n
== 4
5561 && in_mode
== SFmode
&& in_n
== 4)
5562 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_COPYSIGN_V4SF
];
5565 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5566 && out_mode
== DFmode
&& out_n
== 2
5567 && in_mode
== DFmode
&& in_n
== 2)
5568 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIP
];
5569 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5570 && out_mode
== SFmode
&& out_n
== 4
5571 && in_mode
== SFmode
&& in_n
== 4)
5572 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIP
];
5573 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5574 && out_mode
== SFmode
&& out_n
== 4
5575 && in_mode
== SFmode
&& in_n
== 4)
5576 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIP
];
5579 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5580 && out_mode
== DFmode
&& out_n
== 2
5581 && in_mode
== DFmode
&& in_n
== 2)
5582 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIM
];
5583 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5584 && out_mode
== SFmode
&& out_n
== 4
5585 && in_mode
== SFmode
&& in_n
== 4)
5586 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIM
];
5587 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5588 && out_mode
== SFmode
&& out_n
== 4
5589 && in_mode
== SFmode
&& in_n
== 4)
5590 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIM
];
5593 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5594 && out_mode
== DFmode
&& out_n
== 2
5595 && in_mode
== DFmode
&& in_n
== 2)
5596 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDDP
];
5597 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5598 && out_mode
== SFmode
&& out_n
== 4
5599 && in_mode
== SFmode
&& in_n
== 4)
5600 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDSP
];
5601 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5602 && out_mode
== SFmode
&& out_n
== 4
5603 && in_mode
== SFmode
&& in_n
== 4)
5604 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VMADDFP
];
5607 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5608 && out_mode
== DFmode
&& out_n
== 2
5609 && in_mode
== DFmode
&& in_n
== 2)
5610 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIZ
];
5611 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5612 && out_mode
== SFmode
&& out_n
== 4
5613 && in_mode
== SFmode
&& in_n
== 4)
5614 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIZ
];
5615 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5616 && out_mode
== SFmode
&& out_n
== 4
5617 && in_mode
== SFmode
&& in_n
== 4)
5618 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIZ
];
5621 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5622 && flag_unsafe_math_optimizations
5623 && out_mode
== DFmode
&& out_n
== 2
5624 && in_mode
== DFmode
&& in_n
== 2)
5625 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPI
];
5626 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5627 && flag_unsafe_math_optimizations
5628 && out_mode
== SFmode
&& out_n
== 4
5629 && in_mode
== SFmode
&& in_n
== 4)
5630 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPI
];
5633 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5634 && !flag_trapping_math
5635 && out_mode
== DFmode
&& out_n
== 2
5636 && in_mode
== DFmode
&& in_n
== 2)
5637 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIC
];
5638 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5639 && !flag_trapping_math
5640 && out_mode
== SFmode
&& out_n
== 4
5641 && in_mode
== SFmode
&& in_n
== 4)
5642 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIC
];
5648 /* Generate calls to libmass if appropriate. */
5649 if (rs6000_veclib_handler
)
5650 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
5655 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5658 rs6000_builtin_md_vectorized_function (tree fndecl
, tree type_out
,
5661 machine_mode in_mode
, out_mode
;
5664 if (TARGET_DEBUG_BUILTIN
)
5665 fprintf (stderr
, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5666 IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
5667 GET_MODE_NAME (TYPE_MODE (type_out
)),
5668 GET_MODE_NAME (TYPE_MODE (type_in
)));
5670 if (TREE_CODE (type_out
) != VECTOR_TYPE
5671 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5674 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5675 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5676 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5677 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5679 enum rs6000_builtins fn
5680 = (enum rs6000_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
5683 case RS6000_BUILTIN_RSQRTF
:
5684 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
5685 && out_mode
== SFmode
&& out_n
== 4
5686 && in_mode
== SFmode
&& in_n
== 4)
5687 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRSQRTFP
];
5689 case RS6000_BUILTIN_RSQRT
:
5690 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5691 && out_mode
== DFmode
&& out_n
== 2
5692 && in_mode
== DFmode
&& in_n
== 2)
5693 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
5695 case RS6000_BUILTIN_RECIPF
:
5696 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
5697 && out_mode
== SFmode
&& out_n
== 4
5698 && in_mode
== SFmode
&& in_n
== 4)
5699 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRECIPFP
];
5701 case RS6000_BUILTIN_RECIP
:
5702 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5703 && out_mode
== DFmode
&& out_n
== 2
5704 && in_mode
== DFmode
&& in_n
== 2)
5705 return rs6000_builtin_decls
[VSX_BUILTIN_RECIP_V2DF
];
5713 /* Default CPU string for rs6000*_file_start functions. */
5714 static const char *rs6000_default_cpu
;
5716 #ifdef USING_ELFOS_H
5717 const char *rs6000_machine
;
5720 rs6000_machine_from_flags (void)
5722 HOST_WIDE_INT flags
= rs6000_isa_flags
;
5724 /* Disable the flags that should never influence the .machine selection. */
5725 flags
&= ~(OPTION_MASK_PPC_GFXOPT
| OPTION_MASK_PPC_GPOPT
);
5727 if ((flags
& (ISA_3_1_MASKS_SERVER
& ~ISA_3_0_MASKS_SERVER
)) != 0)
5729 if ((flags
& (ISA_3_0_MASKS_SERVER
& ~ISA_2_7_MASKS_SERVER
)) != 0)
5731 if ((flags
& (ISA_2_7_MASKS_SERVER
& ~ISA_2_6_MASKS_SERVER
)) != 0)
5733 if ((flags
& (ISA_2_6_MASKS_SERVER
& ~ISA_2_5_MASKS_SERVER
)) != 0)
5735 if ((flags
& (ISA_2_5_MASKS_SERVER
& ~ISA_2_4_MASKS
)) != 0)
5737 if ((flags
& (ISA_2_4_MASKS
& ~ISA_2_1_MASKS
)) != 0)
5739 if ((flags
& ISA_2_1_MASKS
) != 0)
5741 if ((flags
& OPTION_MASK_POWERPC64
) != 0)
5747 emit_asm_machine (void)
5749 fprintf (asm_out_file
, "\t.machine %s\n", rs6000_machine
);
5753 /* Do anything needed at the start of the asm file. */
5756 rs6000_file_start (void)
5759 const char *start
= buffer
;
5760 FILE *file
= asm_out_file
;
5762 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
5764 default_file_start ();
5766 if (flag_verbose_asm
)
5768 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
5770 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
5772 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
5776 if (global_options_set
.x_rs6000_cpu_index
)
5778 fprintf (file
, "%s -mcpu=%s", start
,
5779 processor_target_table
[rs6000_cpu_index
].name
);
5783 if (global_options_set
.x_rs6000_tune_index
)
5785 fprintf (file
, "%s -mtune=%s", start
,
5786 processor_target_table
[rs6000_tune_index
].name
);
5790 if (PPC405_ERRATUM77
)
5792 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
5796 #ifdef USING_ELFOS_H
5797 switch (rs6000_sdata
)
5799 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
5800 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
5801 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
5802 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
5805 if (rs6000_sdata
&& g_switch_value
)
5807 fprintf (file
, "%s -G %d", start
,
5817 #ifdef USING_ELFOS_H
5818 rs6000_machine
= rs6000_machine_from_flags ();
5819 emit_asm_machine ();
5822 if (DEFAULT_ABI
== ABI_ELFv2
)
5823 fprintf (file
, "\t.abiversion 2\n");
5827 /* Return nonzero if this function is known to have a null epilogue. */
5830 direct_return (void)
5832 if (reload_completed
)
5834 rs6000_stack_t
*info
= rs6000_stack_info ();
5836 if (info
->first_gp_reg_save
== 32
5837 && info
->first_fp_reg_save
== 64
5838 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
5839 && ! info
->lr_save_p
5840 && ! info
->cr_save_p
5841 && info
->vrsave_size
== 0
5849 /* Helper for num_insns_constant. Calculate number of instructions to
5850 load VALUE to a single gpr using combinations of addi, addis, ori,
5851 oris, sldi and rldimi instructions. */
5854 num_insns_constant_gpr (HOST_WIDE_INT value
)
5856 /* signed constant loadable with addi */
5857 if (SIGNED_INTEGER_16BIT_P (value
))
5860 /* constant loadable with addis */
5861 else if ((value
& 0xffff) == 0
5862 && (value
>> 31 == -1 || value
>> 31 == 0))
5865 /* PADDI can support up to 34 bit signed integers. */
5866 else if (TARGET_PREFIXED
&& SIGNED_INTEGER_34BIT_P (value
))
5869 else if (TARGET_POWERPC64
)
5871 HOST_WIDE_INT low
= ((value
& 0xffffffff) ^ 0x80000000) - 0x80000000;
5872 HOST_WIDE_INT high
= value
>> 31;
5874 if (high
== 0 || high
== -1)
5879 if (low
== 0 || low
== high
)
5880 return num_insns_constant_gpr (high
) + 1;
5882 return num_insns_constant_gpr (low
) + 1;
5884 return (num_insns_constant_gpr (high
)
5885 + num_insns_constant_gpr (low
) + 1);
5892 /* Helper for num_insns_constant. Allow constants formed by the
5893 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5894 and handle modes that require multiple gprs. */
5897 num_insns_constant_multi (HOST_WIDE_INT value
, machine_mode mode
)
5899 int nregs
= (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5903 HOST_WIDE_INT low
= sext_hwi (value
, BITS_PER_WORD
);
5904 int insns
= num_insns_constant_gpr (low
);
5906 /* We won't get more than 2 from num_insns_constant_gpr
5907 except when TARGET_POWERPC64 and mode is DImode or
5908 wider, so the register mode must be DImode. */
5909 && rs6000_is_valid_and_mask (GEN_INT (low
), DImode
))
5912 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5913 it all at once would be UB. */
5914 value
>>= (BITS_PER_WORD
- 1);
5920 /* Return the number of instructions it takes to form a constant in as
5921 many gprs are needed for MODE. */
5924 num_insns_constant (rtx op
, machine_mode mode
)
5928 switch (GET_CODE (op
))
5934 case CONST_WIDE_INT
:
5937 for (int i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
5938 insns
+= num_insns_constant_multi (CONST_WIDE_INT_ELT (op
, i
),
5945 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (op
);
5947 if (mode
== SFmode
|| mode
== SDmode
)
5952 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv
, l
);
5954 REAL_VALUE_TO_TARGET_SINGLE (*rv
, l
);
5955 /* See the first define_split in rs6000.md handling a
5956 const_double_operand. */
5960 else if (mode
== DFmode
|| mode
== DDmode
)
5965 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv
, l
);
5967 REAL_VALUE_TO_TARGET_DOUBLE (*rv
, l
);
5969 /* See the second (32-bit) and third (64-bit) define_split
5970 in rs6000.md handling a const_double_operand. */
5971 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 1] << 32;
5972 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffffUL
;
5975 else if (mode
== TFmode
|| mode
== TDmode
5976 || mode
== KFmode
|| mode
== IFmode
)
5982 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv
, l
);
5984 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv
, l
);
5986 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 3] << 32;
5987 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 2] & 0xffffffffUL
;
5988 insns
= num_insns_constant_multi (val
, DImode
);
5989 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 2 : 1] << 32;
5990 val
|= l
[WORDS_BIG_ENDIAN
? 3 : 0] & 0xffffffffUL
;
5991 insns
+= num_insns_constant_multi (val
, DImode
);
6003 return num_insns_constant_multi (val
, mode
);
6006 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6007 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6008 corresponding element of the vector, but for V4SFmode, the
6009 corresponding "float" is interpreted as an SImode integer. */
6012 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6016 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6017 gcc_assert (GET_MODE (op
) != V2DImode
6018 && GET_MODE (op
) != V2DFmode
);
6020 tmp
= CONST_VECTOR_ELT (op
, elt
);
6021 if (GET_MODE (op
) == V4SFmode
)
6022 tmp
= gen_lowpart (SImode
, tmp
);
6023 return INTVAL (tmp
);
6026 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6027 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6028 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6029 all items are set to the same value and contain COPIES replicas of the
6030 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6031 operand and the others are set to the value of the operand's msb. */
6034 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6036 machine_mode mode
= GET_MODE (op
);
6037 machine_mode inner
= GET_MODE_INNER (mode
);
6045 HOST_WIDE_INT splat_val
;
6046 HOST_WIDE_INT msb_val
;
6048 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6051 nunits
= GET_MODE_NUNITS (mode
);
6052 bitsize
= GET_MODE_BITSIZE (inner
);
6053 mask
= GET_MODE_MASK (inner
);
6055 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6057 msb_val
= val
>= 0 ? 0 : -1;
6059 /* Construct the value to be splatted, if possible. If not, return 0. */
6060 for (i
= 2; i
<= copies
; i
*= 2)
6062 HOST_WIDE_INT small_val
;
6064 small_val
= splat_val
>> bitsize
;
6066 if (splat_val
!= ((HOST_WIDE_INT
)
6067 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6068 | (small_val
& mask
)))
6070 splat_val
= small_val
;
6073 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6074 if (EASY_VECTOR_15 (splat_val
))
6077 /* Also check if we can splat, and then add the result to itself. Do so if
6078 the value is positive, of if the splat instruction is using OP's mode;
6079 for splat_val < 0, the splat and the add should use the same mode. */
6080 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6081 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6084 /* Also check if are loading up the most significant bit which can be done by
6085 loading up -1 and shifting the value left by -1. */
6086 else if (EASY_VECTOR_MSB (splat_val
, inner
))
6092 /* Check if VAL is present in every STEP-th element, and the
6093 other elements are filled with its most significant bit. */
6094 for (i
= 1; i
< nunits
; ++i
)
6096 HOST_WIDE_INT desired_val
;
6097 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6098 if ((i
& (step
- 1)) == 0)
6101 desired_val
= msb_val
;
6103 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6110 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6111 instruction, filling in the bottom elements with 0 or -1.
6113 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6114 for the number of zeroes to shift in, or negative for the number of 0xff
6117 OP is a CONST_VECTOR. */
6120 vspltis_shifted (rtx op
)
6122 machine_mode mode
= GET_MODE (op
);
6123 machine_mode inner
= GET_MODE_INNER (mode
);
6131 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6134 /* We need to create pseudo registers to do the shift, so don't recognize
6135 shift vector constants after reload. */
6136 if (!can_create_pseudo_p ())
6139 nunits
= GET_MODE_NUNITS (mode
);
6140 mask
= GET_MODE_MASK (inner
);
6142 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6144 /* Check if the value can really be the operand of a vspltis[bhw]. */
6145 if (EASY_VECTOR_15 (val
))
6148 /* Also check if we are loading up the most significant bit which can be done
6149 by loading up -1 and shifting the value left by -1. */
6150 else if (EASY_VECTOR_MSB (val
, inner
))
6156 /* Check if VAL is present in every STEP-th element until we find elements
6157 that are 0 or all 1 bits. */
6158 for (i
= 1; i
< nunits
; ++i
)
6160 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6161 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6163 /* If the value isn't the splat value, check for the remaining elements
6169 for (j
= i
+1; j
< nunits
; ++j
)
6171 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6172 if (const_vector_elt_as_int (op
, elt2
) != 0)
6176 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6179 else if ((elt_val
& mask
) == mask
)
6181 for (j
= i
+1; j
< nunits
; ++j
)
6183 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6184 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6188 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6196 /* If all elements are equal, we don't need to do VLSDOI. */
6201 /* Return true if OP is of the given MODE and can be synthesized
6202 with a vspltisb, vspltish or vspltisw. */
6205 easy_altivec_constant (rtx op
, machine_mode mode
)
6207 unsigned step
, copies
;
6209 if (mode
== VOIDmode
)
6210 mode
= GET_MODE (op
);
6211 else if (mode
!= GET_MODE (op
))
6214 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6216 if (mode
== V2DFmode
)
6217 return zero_constant (op
, mode
);
6219 else if (mode
== V2DImode
)
6221 if (!CONST_INT_P (CONST_VECTOR_ELT (op
, 0))
6222 || !CONST_INT_P (CONST_VECTOR_ELT (op
, 1)))
6225 if (zero_constant (op
, mode
))
6228 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6229 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6235 /* V1TImode is a special container for TImode. Ignore for now. */
6236 else if (mode
== V1TImode
)
6239 /* Start with a vspltisw. */
6240 step
= GET_MODE_NUNITS (mode
) / 4;
6243 if (vspltis_constant (op
, step
, copies
))
6246 /* Then try with a vspltish. */
6252 if (vspltis_constant (op
, step
, copies
))
6255 /* And finally a vspltisb. */
6261 if (vspltis_constant (op
, step
, copies
))
6264 if (vspltis_shifted (op
) != 0)
6270 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6271 result is OP. Abort if it is not possible. */
6274 gen_easy_altivec_constant (rtx op
)
6276 machine_mode mode
= GET_MODE (op
);
6277 int nunits
= GET_MODE_NUNITS (mode
);
6278 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6279 unsigned step
= nunits
/ 4;
6280 unsigned copies
= 1;
6282 /* Start with a vspltisw. */
6283 if (vspltis_constant (op
, step
, copies
))
6284 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6286 /* Then try with a vspltish. */
6292 if (vspltis_constant (op
, step
, copies
))
6293 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6295 /* And finally a vspltisb. */
6301 if (vspltis_constant (op
, step
, copies
))
6302 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6307 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6308 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6310 Return the number of instructions needed (1 or 2) into the address pointed
6313 Return the constant that is being split via CONSTANT_PTR. */
6316 xxspltib_constant_p (rtx op
,
6321 size_t nunits
= GET_MODE_NUNITS (mode
);
6323 HOST_WIDE_INT value
;
6326 /* Set the returned values to out of bound values. */
6327 *num_insns_ptr
= -1;
6328 *constant_ptr
= 256;
6330 if (!TARGET_P9_VECTOR
)
6333 if (mode
== VOIDmode
)
6334 mode
= GET_MODE (op
);
6336 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6339 /* Handle (vec_duplicate <constant>). */
6340 if (GET_CODE (op
) == VEC_DUPLICATE
)
6342 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6343 && mode
!= V2DImode
)
6346 element
= XEXP (op
, 0);
6347 if (!CONST_INT_P (element
))
6350 value
= INTVAL (element
);
6351 if (!IN_RANGE (value
, -128, 127))
6355 /* Handle (const_vector [...]). */
6356 else if (GET_CODE (op
) == CONST_VECTOR
)
6358 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6359 && mode
!= V2DImode
)
6362 element
= CONST_VECTOR_ELT (op
, 0);
6363 if (!CONST_INT_P (element
))
6366 value
= INTVAL (element
);
6367 if (!IN_RANGE (value
, -128, 127))
6370 for (i
= 1; i
< nunits
; i
++)
6372 element
= CONST_VECTOR_ELT (op
, i
);
6373 if (!CONST_INT_P (element
))
6376 if (value
!= INTVAL (element
))
6381 /* Handle integer constants being loaded into the upper part of the VSX
6382 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6383 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6384 else if (CONST_INT_P (op
))
6386 if (!SCALAR_INT_MODE_P (mode
))
6389 value
= INTVAL (op
);
6390 if (!IN_RANGE (value
, -128, 127))
6393 if (!IN_RANGE (value
, -1, 0))
6395 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6398 if (EASY_VECTOR_15 (value
))
6406 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6407 sign extend. Special case 0/-1 to allow getting any VSX register instead
6408 of an Altivec register. */
6409 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6410 && EASY_VECTOR_15 (value
))
6413 /* Return # of instructions and the constant byte for XXSPLTIB. */
6414 if (mode
== V16QImode
)
6417 else if (IN_RANGE (value
, -1, 0))
6423 *constant_ptr
= (int) value
;
6428 output_vec_const_move (rtx
*operands
)
6436 mode
= GET_MODE (dest
);
6440 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6441 int xxspltib_value
= 256;
6444 if (zero_constant (vec
, mode
))
6446 if (TARGET_P9_VECTOR
)
6447 return "xxspltib %x0,0";
6449 else if (dest_vmx_p
)
6450 return "vspltisw %0,0";
6453 return "xxlxor %x0,%x0,%x0";
6456 if (all_ones_constant (vec
, mode
))
6458 if (TARGET_P9_VECTOR
)
6459 return "xxspltib %x0,255";
6461 else if (dest_vmx_p
)
6462 return "vspltisw %0,-1";
6464 else if (TARGET_P8_VECTOR
)
6465 return "xxlorc %x0,%x0,%x0";
6471 if (TARGET_P9_VECTOR
6472 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6476 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6477 return "xxspltib %x0,%2";
6488 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6489 if (zero_constant (vec
, mode
))
6490 return "vspltisw %0,0";
6492 if (all_ones_constant (vec
, mode
))
6493 return "vspltisw %0,-1";
6495 /* Do we need to construct a value using VSLDOI? */
6496 shift
= vspltis_shifted (vec
);
6500 splat_vec
= gen_easy_altivec_constant (vec
);
6501 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6502 operands
[1] = XEXP (splat_vec
, 0);
6503 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6506 switch (GET_MODE (splat_vec
))
6509 return "vspltisw %0,%1";
6512 return "vspltish %0,%1";
6515 return "vspltisb %0,%1";
6525 /* Initialize vector TARGET to VALS. */
6528 rs6000_expand_vector_init (rtx target
, rtx vals
)
6530 machine_mode mode
= GET_MODE (target
);
6531 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6532 unsigned int n_elts
= GET_MODE_NUNITS (mode
);
6533 int n_var
= 0, one_var
= -1;
6534 bool all_same
= true, all_const_zero
= true;
6538 for (i
= 0; i
< n_elts
; ++i
)
6540 x
= XVECEXP (vals
, 0, i
);
6541 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
6542 ++n_var
, one_var
= i
;
6543 else if (x
!= CONST0_RTX (inner_mode
))
6544 all_const_zero
= false;
6546 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6552 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6553 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
6554 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
6556 /* Zero register. */
6557 emit_move_insn (target
, CONST0_RTX (mode
));
6560 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
6562 /* Splat immediate. */
6563 emit_insn (gen_rtx_SET (target
, const_vec
));
6568 /* Load from constant pool. */
6569 emit_move_insn (target
, const_vec
);
6574 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6575 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
6579 size_t num_elements
= all_same
? 1 : 2;
6580 for (i
= 0; i
< num_elements
; i
++)
6582 op
[i
] = XVECEXP (vals
, 0, i
);
6583 /* Just in case there is a SUBREG with a smaller mode, do a
6585 if (GET_MODE (op
[i
]) != inner_mode
)
6587 rtx tmp
= gen_reg_rtx (inner_mode
);
6588 convert_move (tmp
, op
[i
], 0);
6591 /* Allow load with splat double word. */
6592 else if (MEM_P (op
[i
]))
6595 op
[i
] = force_reg (inner_mode
, op
[i
]);
6597 else if (!REG_P (op
[i
]))
6598 op
[i
] = force_reg (inner_mode
, op
[i
]);
6603 if (mode
== V2DFmode
)
6604 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
6606 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
6610 if (mode
== V2DFmode
)
6611 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
6613 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
6618 /* Special case initializing vector int if we are on 64-bit systems with
6619 direct move or we have the ISA 3.0 instructions. */
6620 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
6621 && TARGET_DIRECT_MOVE_64BIT
)
6625 rtx element0
= XVECEXP (vals
, 0, 0);
6626 if (MEM_P (element0
))
6627 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6629 element0
= force_reg (SImode
, element0
);
6631 if (TARGET_P9_VECTOR
)
6632 emit_insn (gen_vsx_splat_v4si (target
, element0
));
6635 rtx tmp
= gen_reg_rtx (DImode
);
6636 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
6637 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
6646 for (i
= 0; i
< 4; i
++)
6647 elements
[i
] = force_reg (SImode
, XVECEXP (vals
, 0, i
));
6649 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
6650 elements
[2], elements
[3]));
6655 /* With single precision floating point on VSX, know that internally single
6656 precision is actually represented as a double, and either make 2 V2DF
6657 vectors, and convert these vectors to single precision, or do one
6658 conversion, and splat the result to the other elements. */
6659 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
6663 rtx element0
= XVECEXP (vals
, 0, 0);
6665 if (TARGET_P9_VECTOR
)
6667 if (MEM_P (element0
))
6668 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6670 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
6675 rtx freg
= gen_reg_rtx (V4SFmode
);
6676 rtx sreg
= force_reg (SFmode
, element0
);
6677 rtx cvt
= (TARGET_XSCVDPSPN
6678 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
6679 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
6682 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
6688 if (TARGET_P8_VECTOR
&& TARGET_POWERPC64
)
6694 for (i
= 0; i
< 4; i
++)
6696 tmp_si
[i
] = gen_reg_rtx (SImode
);
6697 tmp_di
[i
] = gen_reg_rtx (DImode
);
6698 mrg_di
[i
] = gen_reg_rtx (DImode
);
6699 tmp_sf
[i
] = force_reg (SFmode
, XVECEXP (vals
, 0, i
));
6700 emit_insn (gen_movsi_from_sf (tmp_si
[i
], tmp_sf
[i
]));
6701 emit_insn (gen_zero_extendsidi2 (tmp_di
[i
], tmp_si
[i
]));
6704 if (!BYTES_BIG_ENDIAN
)
6706 std::swap (tmp_di
[0], tmp_di
[1]);
6707 std::swap (tmp_di
[2], tmp_di
[3]);
6710 emit_insn (gen_ashldi3 (mrg_di
[0], tmp_di
[0], GEN_INT (32)));
6711 emit_insn (gen_iordi3 (mrg_di
[1], mrg_di
[0], tmp_di
[1]));
6712 emit_insn (gen_ashldi3 (mrg_di
[2], tmp_di
[2], GEN_INT (32)));
6713 emit_insn (gen_iordi3 (mrg_di
[3], mrg_di
[2], tmp_di
[3]));
6715 rtx tmp_v2di
= gen_reg_rtx (V2DImode
);
6716 emit_insn (gen_vsx_concat_v2di (tmp_v2di
, mrg_di
[1], mrg_di
[3]));
6717 emit_move_insn (target
, gen_lowpart (V4SFmode
, tmp_v2di
));
6721 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
6722 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
6723 rtx flt_even
= gen_reg_rtx (V4SFmode
);
6724 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
6725 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
6726 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
6727 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
6728 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
6730 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
6731 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
6732 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
6733 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
6734 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
6740 /* Special case initializing vector short/char that are splats if we are on
6741 64-bit systems with direct move. */
6742 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
6743 && (mode
== V16QImode
|| mode
== V8HImode
))
6745 rtx op0
= XVECEXP (vals
, 0, 0);
6746 rtx di_tmp
= gen_reg_rtx (DImode
);
6749 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
6751 if (mode
== V16QImode
)
6753 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
6754 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
6758 if (mode
== V8HImode
)
6760 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
6761 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
6766 /* Store value to stack temp. Load vector element. Splat. However, splat
6767 of 64-bit items is not supported on Altivec. */
6768 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
6770 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
6771 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
6772 XVECEXP (vals
, 0, 0));
6773 x
= gen_rtx_UNSPEC (VOIDmode
,
6774 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
6775 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
6777 gen_rtx_SET (target
, mem
),
6779 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
6780 gen_rtx_PARALLEL (VOIDmode
,
6781 gen_rtvec (1, const0_rtx
)));
6782 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
6786 /* One field is non-constant. Load constant then overwrite
6790 rtx copy
= copy_rtx (vals
);
6792 /* Load constant part of vector, substitute neighboring value for
6794 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
6795 rs6000_expand_vector_init (target
, copy
);
6797 /* Insert variable. */
6798 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
),
6803 if (TARGET_DIRECT_MOVE
&& (mode
== V16QImode
|| mode
== V8HImode
))
6806 /* Force the values into word_mode registers. */
6807 for (i
= 0; i
< n_elts
; i
++)
6809 rtx tmp
= force_reg (inner_mode
, XVECEXP (vals
, 0, i
));
6810 machine_mode tmode
= TARGET_POWERPC64
? DImode
: SImode
;
6811 op
[i
] = simplify_gen_subreg (tmode
, tmp
, inner_mode
, 0);
6814 /* Take unsigned char big endianness on 64bit as example for below
6815 construction, the input values are: A, B, C, D, ..., O, P. */
6817 if (TARGET_DIRECT_MOVE_128
)
6819 /* Move to VSX register with vec_concat, each has 2 values.
6820 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6821 vr1[1] = { xxxxxxxC, xxxxxxxD };
6823 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
6825 for (i
= 0; i
< n_elts
/ 2; i
++)
6827 vr1
[i
] = gen_reg_rtx (V2DImode
);
6828 emit_insn (gen_vsx_concat_v2di (vr1
[i
], op
[i
* 2],
6832 /* Pack vectors with 2 values into vectors with 4 values.
6833 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6834 vr2[1] = { xxxExxxF, xxxGxxxH };
6835 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6836 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
6838 for (i
= 0; i
< n_elts
/ 4; i
++)
6840 vr2
[i
] = gen_reg_rtx (V4SImode
);
6841 emit_insn (gen_altivec_vpkudum (vr2
[i
], vr1
[i
* 2],
6845 /* Pack vectors with 4 values into vectors with 8 values.
6846 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
6847 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
6849 for (i
= 0; i
< n_elts
/ 8; i
++)
6851 vr3
[i
] = gen_reg_rtx (V8HImode
);
6852 emit_insn (gen_altivec_vpkuwum (vr3
[i
], vr2
[i
* 2],
6856 /* If it's V8HImode, it's done and return it. */
6857 if (mode
== V8HImode
)
6859 emit_insn (gen_rtx_SET (target
, vr3
[0]));
6863 /* Pack vectors with 8 values into 16 values. */
6864 rtx res
= gen_reg_rtx (V16QImode
);
6865 emit_insn (gen_altivec_vpkuhum (res
, vr3
[0], vr3
[1]));
6866 emit_insn (gen_rtx_SET (target
, res
));
6870 rtx (*merge_v16qi
) (rtx
, rtx
, rtx
) = NULL
;
6871 rtx (*merge_v8hi
) (rtx
, rtx
, rtx
) = NULL
;
6872 rtx (*merge_v4si
) (rtx
, rtx
, rtx
) = NULL
;
6875 /* Set up some common gen routines and values. */
6876 if (BYTES_BIG_ENDIAN
)
6878 if (mode
== V16QImode
)
6880 merge_v16qi
= gen_altivec_vmrghb
;
6881 merge_v8hi
= gen_altivec_vmrglh
;
6884 merge_v8hi
= gen_altivec_vmrghh
;
6886 merge_v4si
= gen_altivec_vmrglw
;
6887 perm_idx
= GEN_INT (3);
6891 if (mode
== V16QImode
)
6893 merge_v16qi
= gen_altivec_vmrglb
;
6894 merge_v8hi
= gen_altivec_vmrghh
;
6897 merge_v8hi
= gen_altivec_vmrglh
;
6899 merge_v4si
= gen_altivec_vmrghw
;
6900 perm_idx
= GEN_INT (0);
6903 /* Move to VSX register with direct move.
6904 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
6905 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
6907 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
6909 for (i
= 0; i
< n_elts
; i
++)
6911 vr_qi
[i
] = gen_reg_rtx (V16QImode
);
6912 if (TARGET_POWERPC64
)
6913 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi
[i
], op
[i
]));
6915 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi
[i
], op
[i
]));
6918 /* Merge/move to vector short.
6919 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
6920 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
6922 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
6924 for (i
= 0; i
< 8; i
++)
6927 if (mode
== V16QImode
)
6929 tmp
= gen_reg_rtx (V16QImode
);
6930 emit_insn (merge_v16qi (tmp
, vr_qi
[2 * i
], vr_qi
[2 * i
+ 1]));
6932 vr_hi
[i
] = gen_reg_rtx (V8HImode
);
6933 emit_move_insn (vr_hi
[i
], gen_lowpart (V8HImode
, tmp
));
6936 /* Merge vector short to vector int.
6937 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
6938 vr_si[1] = { xxxxxxxx, xxxxEFGH };
6940 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
6942 for (i
= 0; i
< 4; i
++)
6944 rtx tmp
= gen_reg_rtx (V8HImode
);
6945 emit_insn (merge_v8hi (tmp
, vr_hi
[2 * i
], vr_hi
[2 * i
+ 1]));
6946 vr_si
[i
] = gen_reg_rtx (V4SImode
);
6947 emit_move_insn (vr_si
[i
], gen_lowpart (V4SImode
, tmp
));
6950 /* Merge vector int to vector long.
6951 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
6952 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
6954 for (i
= 0; i
< 2; i
++)
6956 rtx tmp
= gen_reg_rtx (V4SImode
);
6957 emit_insn (merge_v4si (tmp
, vr_si
[2 * i
], vr_si
[2 * i
+ 1]));
6958 vr_di
[i
] = gen_reg_rtx (V2DImode
);
6959 emit_move_insn (vr_di
[i
], gen_lowpart (V2DImode
, tmp
));
6962 rtx res
= gen_reg_rtx (V2DImode
);
6963 emit_insn (gen_vsx_xxpermdi_v2di (res
, vr_di
[0], vr_di
[1], perm_idx
));
6964 emit_insn (gen_rtx_SET (target
, gen_lowpart (mode
, res
)));
6970 /* Construct the vector in memory one field at a time
6971 and load the whole vector. */
6972 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
6973 for (i
= 0; i
< n_elts
; i
++)
6974 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
6975 i
* GET_MODE_SIZE (inner_mode
)),
6976 XVECEXP (vals
, 0, i
));
6977 emit_move_insn (target
, mem
);
6980 /* Set field ELT_RTX of TARGET to VAL. */
6983 rs6000_expand_vector_set (rtx target
, rtx val
, rtx elt_rtx
)
6985 machine_mode mode
= GET_MODE (target
);
6986 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6987 rtx reg
= gen_reg_rtx (mode
);
6989 int width
= GET_MODE_SIZE (inner_mode
);
6992 val
= force_reg (GET_MODE (val
), val
);
6994 if (VECTOR_MEM_VSX_P (mode
))
6996 rtx insn
= NULL_RTX
;
6998 if (mode
== V2DFmode
)
6999 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7001 else if (mode
== V2DImode
)
7002 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7004 else if (TARGET_P9_VECTOR
&& TARGET_POWERPC64
)
7006 if (mode
== V4SImode
)
7007 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7008 else if (mode
== V8HImode
)
7009 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7010 else if (mode
== V16QImode
)
7011 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7012 else if (mode
== V4SFmode
)
7013 insn
= gen_vsx_set_v4sf_p9 (target
, target
, val
, elt_rtx
);
7023 gcc_assert (CONST_INT_P (elt_rtx
));
7025 /* Simplify setting single element vectors like V1TImode. */
7026 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
)
7027 && INTVAL (elt_rtx
) == 0)
7029 emit_move_insn (target
, gen_lowpart (mode
, val
));
7033 /* Load single variable value. */
7034 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7035 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7036 x
= gen_rtx_UNSPEC (VOIDmode
,
7037 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7038 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7040 gen_rtx_SET (reg
, mem
),
7043 /* Linear sequence. */
7044 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7045 for (i
= 0; i
< 16; ++i
)
7046 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7048 /* Set permute mask to insert element into target. */
7049 for (i
= 0; i
< width
; ++i
)
7050 XVECEXP (mask
, 0, INTVAL (elt_rtx
) * width
+ i
) = GEN_INT (i
+ 0x10);
7051 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7053 if (BYTES_BIG_ENDIAN
)
7054 x
= gen_rtx_UNSPEC (mode
,
7055 gen_rtvec (3, target
, reg
,
7056 force_reg (V16QImode
, x
)),
7060 if (TARGET_P9_VECTOR
)
7061 x
= gen_rtx_UNSPEC (mode
,
7062 gen_rtvec (3, reg
, target
,
7063 force_reg (V16QImode
, x
)),
7067 /* Invert selector. We prefer to generate VNAND on P8 so
7068 that future fusion opportunities can kick in, but must
7069 generate VNOR elsewhere. */
7070 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7071 rtx iorx
= (TARGET_P8_VECTOR
7072 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7073 : gen_rtx_AND (V16QImode
, notx
, notx
));
7074 rtx tmp
= gen_reg_rtx (V16QImode
);
7075 emit_insn (gen_rtx_SET (tmp
, iorx
));
7077 /* Permute with operands reversed and adjusted selector. */
7078 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7083 emit_insn (gen_rtx_SET (target
, x
));
7086 /* Extract field ELT from VEC into TARGET. */
7089 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7091 machine_mode mode
= GET_MODE (vec
);
7092 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7095 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7102 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7105 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7108 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7111 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7114 if (TARGET_DIRECT_MOVE_64BIT
)
7116 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7122 if (TARGET_DIRECT_MOVE_64BIT
)
7124 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7130 if (TARGET_DIRECT_MOVE_64BIT
)
7132 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7138 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7139 && TARGET_DIRECT_MOVE_64BIT
)
7141 if (GET_MODE (elt
) != DImode
)
7143 rtx tmp
= gen_reg_rtx (DImode
);
7144 convert_move (tmp
, elt
, 0);
7147 else if (!REG_P (elt
))
7148 elt
= force_reg (DImode
, elt
);
7153 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7157 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7161 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7165 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7169 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7173 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7177 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7185 /* Allocate mode-sized buffer. */
7186 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7188 emit_move_insn (mem
, vec
);
7189 if (CONST_INT_P (elt
))
7191 int modulo_elt
= INTVAL (elt
) % GET_MODE_NUNITS (mode
);
7193 /* Add offset to field within buffer matching vector element. */
7194 mem
= adjust_address_nv (mem
, inner_mode
,
7195 modulo_elt
* GET_MODE_SIZE (inner_mode
));
7196 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7200 unsigned int ele_size
= GET_MODE_SIZE (inner_mode
);
7201 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
7202 rtx new_addr
= gen_reg_rtx (Pmode
);
7204 elt
= gen_rtx_AND (Pmode
, elt
, num_ele_m1
);
7206 elt
= gen_rtx_MULT (Pmode
, elt
, GEN_INT (ele_size
));
7207 new_addr
= gen_rtx_PLUS (Pmode
, XEXP (mem
, 0), elt
);
7208 new_addr
= change_address (mem
, inner_mode
, new_addr
);
7209 emit_move_insn (target
, new_addr
);
7213 /* Return the offset within a memory object (MEM) of a vector type to a given
7214 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7215 the element is constant, we return a constant integer.
7217 Otherwise, we use a base register temporary to calculate the offset after
7218 masking it to fit within the bounds of the vector and scaling it. The
7219 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7220 built-in function. */
7223 get_vector_offset (rtx mem
, rtx element
, rtx base_tmp
, unsigned scalar_size
)
7225 if (CONST_INT_P (element
))
7226 return GEN_INT (INTVAL (element
) * scalar_size
);
7228 /* All insns should use the 'Q' constraint (address is a single register) if
7229 the element number is not a constant. */
7230 gcc_assert (satisfies_constraint_Q (mem
));
7232 /* Mask the element to make sure the element number is between 0 and the
7233 maximum number of elements - 1 so that we don't generate an address
7234 outside the vector. */
7235 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (GET_MODE (mem
)) - 1);
7236 rtx and_op
= gen_rtx_AND (Pmode
, element
, num_ele_m1
);
7237 emit_insn (gen_rtx_SET (base_tmp
, and_op
));
7239 /* Shift the element to get the byte offset from the element number. */
7240 int shift
= exact_log2 (scalar_size
);
7241 gcc_assert (shift
>= 0);
7245 rtx shift_op
= gen_rtx_ASHIFT (Pmode
, base_tmp
, GEN_INT (shift
));
7246 emit_insn (gen_rtx_SET (base_tmp
, shift_op
));
7252 /* Helper function update PC-relative addresses when we are adjusting a memory
7253 address (ADDR) to a vector to point to a scalar field within the vector with
7254 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7255 use the base register temporary (BASE_TMP) to form the address. */
7258 adjust_vec_address_pcrel (rtx addr
, rtx element_offset
, rtx base_tmp
)
7260 rtx new_addr
= NULL
;
7262 gcc_assert (CONST_INT_P (element_offset
));
7264 if (GET_CODE (addr
) == CONST
)
7265 addr
= XEXP (addr
, 0);
7267 if (GET_CODE (addr
) == PLUS
)
7269 rtx op0
= XEXP (addr
, 0);
7270 rtx op1
= XEXP (addr
, 1);
7272 if (CONST_INT_P (op1
))
7274 HOST_WIDE_INT offset
7275 = INTVAL (XEXP (addr
, 1)) + INTVAL (element_offset
);
7282 rtx plus
= gen_rtx_PLUS (Pmode
, op0
, GEN_INT (offset
));
7283 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7289 emit_move_insn (base_tmp
, addr
);
7290 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7294 else if (SYMBOL_REF_P (addr
) || LABEL_REF_P (addr
))
7296 rtx plus
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7297 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7306 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7307 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7308 temporary (BASE_TMP) to fixup the address. Return the new memory address
7309 that is valid for reads or writes to a given register (SCALAR_REG).
7311 This function is expected to be called after reload is completed when we are
7312 splitting insns. The temporary BASE_TMP might be set multiple times with
7316 rs6000_adjust_vec_address (rtx scalar_reg
,
7320 machine_mode scalar_mode
)
7322 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7323 rtx addr
= XEXP (mem
, 0);
7326 gcc_assert (!reg_mentioned_p (base_tmp
, addr
));
7327 gcc_assert (!reg_mentioned_p (base_tmp
, element
));
7329 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7330 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7332 /* Calculate what we need to add to the address to get the element
7334 rtx element_offset
= get_vector_offset (mem
, element
, base_tmp
, scalar_size
);
7336 /* Create the new address pointing to the element within the vector. If we
7337 are adding 0, we don't have to change the address. */
7338 if (element_offset
== const0_rtx
)
7341 /* A simple indirect address can be converted into a reg + offset
7343 else if (REG_P (addr
) || SUBREG_P (addr
))
7344 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7346 /* For references to local static variables, fold a constant offset into the
7348 else if (pcrel_local_address (addr
, Pmode
) && CONST_INT_P (element_offset
))
7349 new_addr
= adjust_vec_address_pcrel (addr
, element_offset
, base_tmp
);
7351 /* Optimize D-FORM addresses with constant offset with a constant element, to
7352 include the element offset in the address directly. */
7353 else if (GET_CODE (addr
) == PLUS
)
7355 rtx op0
= XEXP (addr
, 0);
7356 rtx op1
= XEXP (addr
, 1);
7358 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7359 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7361 /* op0 should never be r0, because r0+offset is not valid. But it
7362 doesn't hurt to make sure it is not r0. */
7363 gcc_assert (reg_or_subregno (op0
) != 0);
7365 /* D-FORM address with constant element number. */
7366 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7367 rtx offset_rtx
= GEN_INT (offset
);
7368 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7372 /* If we don't have a D-FORM address with a constant element number,
7373 add the two elements in the current address. Then add the offset.
7375 Previously, we tried to add the offset to OP1 and change the
7376 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7377 complicated because we had to verify that op1 was not GPR0 and we
7378 had a constant element offset (due to the way ADDI is defined).
7379 By doing the add of OP0 and OP1 first, and then adding in the
7380 offset, it has the benefit that if D-FORM instructions are
7381 allowed, the offset is part of the memory access to the vector
7383 emit_insn (gen_rtx_SET (base_tmp
, gen_rtx_PLUS (Pmode
, op0
, op1
)));
7384 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7390 emit_move_insn (base_tmp
, addr
);
7391 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7394 /* If the address isn't valid, move the address into the temporary base
7395 register. Some reasons it could not be valid include:
7397 The address offset overflowed the 16 or 34 bit offset size;
7398 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7399 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7400 Only X_FORM loads can be done, and the address is D_FORM. */
7402 enum insn_form iform
7403 = address_to_insn_form (new_addr
, scalar_mode
,
7404 reg_to_non_prefixed (scalar_reg
, scalar_mode
));
7406 if (iform
== INSN_FORM_BAD
)
7408 emit_move_insn (base_tmp
, new_addr
);
7409 new_addr
= base_tmp
;
7412 return change_address (mem
, scalar_mode
, new_addr
);
7415 /* Split a variable vec_extract operation into the component instructions. */
7418 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7421 machine_mode mode
= GET_MODE (src
);
7422 machine_mode scalar_mode
= GET_MODE_INNER (GET_MODE (src
));
7423 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7424 int byte_shift
= exact_log2 (scalar_size
);
7426 gcc_assert (byte_shift
>= 0);
7428 /* If we are given a memory address, optimize to load just the element. We
7429 don't have to adjust the vector element number on little endian
7433 emit_move_insn (dest
,
7434 rs6000_adjust_vec_address (dest
, src
, element
, tmp_gpr
,
7439 else if (REG_P (src
) || SUBREG_P (src
))
7441 int num_elements
= GET_MODE_NUNITS (mode
);
7442 int bits_in_element
= mode_to_bits (GET_MODE_INNER (mode
));
7443 int bit_shift
= 7 - exact_log2 (num_elements
);
7445 unsigned int dest_regno
= reg_or_subregno (dest
);
7446 unsigned int src_regno
= reg_or_subregno (src
);
7447 unsigned int element_regno
= reg_or_subregno (element
);
7449 gcc_assert (REG_P (tmp_gpr
));
7451 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7452 a general purpose register. */
7453 if (TARGET_P9_VECTOR
7454 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
7455 && INT_REGNO_P (dest_regno
)
7456 && ALTIVEC_REGNO_P (src_regno
)
7457 && INT_REGNO_P (element_regno
))
7459 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
7460 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
7462 if (mode
== V16QImode
)
7463 emit_insn (BYTES_BIG_ENDIAN
7464 ? gen_vextublx (dest_si
, element_si
, src
)
7465 : gen_vextubrx (dest_si
, element_si
, src
));
7467 else if (mode
== V8HImode
)
7469 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7470 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
7471 emit_insn (BYTES_BIG_ENDIAN
7472 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
7473 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
7479 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7480 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
7481 emit_insn (BYTES_BIG_ENDIAN
7482 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
7483 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
7490 gcc_assert (REG_P (tmp_altivec
));
7492 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7493 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7494 will shift the element into the upper position (adding 3 to convert a
7495 byte shift into a bit shift). */
7496 if (scalar_size
== 8)
7498 if (!BYTES_BIG_ENDIAN
)
7500 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
7506 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7508 emit_insn (gen_rtx_SET (tmp_gpr
,
7509 gen_rtx_AND (DImode
,
7510 gen_rtx_ASHIFT (DImode
,
7517 if (!BYTES_BIG_ENDIAN
)
7519 rtx num_ele_m1
= GEN_INT (num_elements
- 1);
7521 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
7522 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
7528 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
7531 /* Get the value into the lower byte of the Altivec register where VSLO
7533 if (TARGET_P9_VECTOR
)
7534 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
7535 else if (can_create_pseudo_p ())
7536 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
7539 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7540 emit_move_insn (tmp_di
, tmp_gpr
);
7541 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
7544 /* Do the VSLO to get the value into the final location. */
7548 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
7552 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
7557 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7558 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
7559 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7560 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7563 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
7571 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7572 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7573 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
7574 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7576 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
7577 emit_insn (gen_lshrdi3 (tmp_gpr_di
, tmp_gpr_di
,
7578 GEN_INT (64 - bits_in_element
)));
7592 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7593 selects whether the alignment is abi mandated, optional, or
7594 both abi and optional alignment. */
7597 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
7599 if (how
!= align_opt
)
7601 if (TREE_CODE (type
) == VECTOR_TYPE
&& align
< 128)
7605 if (how
!= align_abi
)
7607 if (TREE_CODE (type
) == ARRAY_TYPE
7608 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
7610 if (align
< BITS_PER_WORD
)
7611 align
= BITS_PER_WORD
;
7618 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7619 instructions simply ignore the low bits; VSX memory instructions
7620 are aligned to 4 or 8 bytes. */
7623 rs6000_slow_unaligned_access (machine_mode mode
, unsigned int align
)
7625 return (STRICT_ALIGNMENT
7626 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7627 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && align
< 32)
7628 || ((VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
))
7629 && (int) align
< VECTOR_ALIGN (mode
)))));
7632 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7635 rs6000_special_adjust_field_align_p (tree type
, unsigned int computed
)
7637 if (TARGET_ALTIVEC
&& TREE_CODE (type
) == VECTOR_TYPE
)
7639 if (computed
!= 128)
7642 if (!warned
&& warn_psabi
)
7645 inform (input_location
,
7646 "the layout of aggregates containing vectors with"
7647 " %d-byte alignment has changed in GCC 5",
7648 computed
/ BITS_PER_UNIT
);
7651 /* In current GCC there is no special case. */
7658 /* AIX increases natural record alignment to doubleword if the first
7659 field is an FP double while the FP fields remain word aligned. */
7662 rs6000_special_round_type_align (tree type
, unsigned int computed
,
7663 unsigned int specified
)
7665 unsigned int align
= MAX (computed
, specified
);
7666 tree field
= TYPE_FIELDS (type
);
7668 /* Skip all non field decls */
7669 while (field
!= NULL
7670 && (TREE_CODE (field
) != FIELD_DECL
7671 || DECL_FIELD_ABI_IGNORED (field
)))
7672 field
= DECL_CHAIN (field
);
7674 if (field
!= NULL
&& field
!= type
)
7676 type
= TREE_TYPE (field
);
7677 while (TREE_CODE (type
) == ARRAY_TYPE
)
7678 type
= TREE_TYPE (type
);
7680 if (type
!= error_mark_node
&& TYPE_MODE (type
) == DFmode
)
7681 align
= MAX (align
, 64);
7687 /* Darwin increases record alignment to the natural alignment of
7691 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
7692 unsigned int specified
)
7694 unsigned int align
= MAX (computed
, specified
);
7696 if (TYPE_PACKED (type
))
7699 /* Find the first field, looking down into aggregates. */
7701 tree field
= TYPE_FIELDS (type
);
7702 /* Skip all non field decls */
7703 while (field
!= NULL
7704 && (TREE_CODE (field
) != FIELD_DECL
7705 || DECL_FIELD_ABI_IGNORED (field
)))
7706 field
= DECL_CHAIN (field
);
7709 /* A packed field does not contribute any extra alignment. */
7710 if (DECL_PACKED (field
))
7712 type
= TREE_TYPE (field
);
7713 while (TREE_CODE (type
) == ARRAY_TYPE
)
7714 type
= TREE_TYPE (type
);
7715 } while (AGGREGATE_TYPE_P (type
));
7717 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
)
7718 align
= MAX (align
, TYPE_ALIGN (type
));
7723 /* Return 1 for an operand in small memory on V.4/eabi. */
7726 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
7727 machine_mode mode ATTRIBUTE_UNUSED
)
7732 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
7735 if (DEFAULT_ABI
!= ABI_V4
)
7738 if (SYMBOL_REF_P (op
))
7741 else if (GET_CODE (op
) != CONST
7742 || GET_CODE (XEXP (op
, 0)) != PLUS
7743 || !SYMBOL_REF_P (XEXP (XEXP (op
, 0), 0))
7744 || !CONST_INT_P (XEXP (XEXP (op
, 0), 1)))
7749 rtx sum
= XEXP (op
, 0);
7750 HOST_WIDE_INT summand
;
7752 /* We have to be careful here, because it is the referenced address
7753 that must be 32k from _SDA_BASE_, not just the symbol. */
7754 summand
= INTVAL (XEXP (sum
, 1));
7755 if (summand
< 0 || summand
> g_switch_value
)
7758 sym_ref
= XEXP (sum
, 0);
7761 return SYMBOL_REF_SMALL_P (sym_ref
);
7767 /* Return true if either operand is a general purpose register. */
7770 gpr_or_gpr_p (rtx op0
, rtx op1
)
7772 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
7773 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
7776 /* Return true if this is a move direct operation between GPR registers and
7777 floating point/VSX registers. */
7780 direct_move_p (rtx op0
, rtx op1
)
7782 if (!REG_P (op0
) || !REG_P (op1
))
7785 if (!TARGET_DIRECT_MOVE
)
7788 int regno0
= REGNO (op0
);
7789 int regno1
= REGNO (op1
);
7790 if (!HARD_REGISTER_NUM_P (regno0
) || !HARD_REGISTER_NUM_P (regno1
))
7793 if (INT_REGNO_P (regno0
) && VSX_REGNO_P (regno1
))
7796 if (VSX_REGNO_P (regno0
) && INT_REGNO_P (regno1
))
7802 /* Return true if the ADDR is an acceptable address for a quad memory
7803 operation of mode MODE (either LQ/STQ for general purpose registers, or
7804 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7805 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7806 3.0 LXV/STXV instruction. */
7809 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
7813 if (GET_MODE_SIZE (mode
) < 16)
7816 if (legitimate_indirect_address_p (addr
, strict
))
7819 if (VECTOR_MODE_P (mode
) && !mode_supports_dq_form (mode
))
7822 /* Is this a valid prefixed address? If the bottom four bits of the offset
7823 are non-zero, we could use a prefixed instruction (which does not have the
7824 DQ-form constraint that the traditional instruction had) instead of
7825 forcing the unaligned offset to a GPR. */
7826 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DQ
))
7829 if (GET_CODE (addr
) != PLUS
)
7832 op0
= XEXP (addr
, 0);
7833 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
7836 op1
= XEXP (addr
, 1);
7837 if (!CONST_INT_P (op1
))
7840 return quad_address_offset_p (INTVAL (op1
));
7843 /* Return true if this is a load or store quad operation. This function does
7844 not handle the atomic quad memory instructions. */
7847 quad_load_store_p (rtx op0
, rtx op1
)
7851 if (!TARGET_QUAD_MEMORY
)
7854 else if (REG_P (op0
) && MEM_P (op1
))
7855 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
7856 && quad_memory_operand (op1
, GET_MODE (op1
))
7857 && !reg_overlap_mentioned_p (op0
, op1
));
7859 else if (MEM_P (op0
) && REG_P (op1
))
7860 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
7861 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
7866 if (TARGET_DEBUG_ADDR
)
7868 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
7869 ret
? "true" : "false");
7870 debug_rtx (gen_rtx_SET (op0
, op1
));
7876 /* Given an address, return a constant offset term if one exists. */
7879 address_offset (rtx op
)
7881 if (GET_CODE (op
) == PRE_INC
7882 || GET_CODE (op
) == PRE_DEC
)
7884 else if (GET_CODE (op
) == PRE_MODIFY
7885 || GET_CODE (op
) == LO_SUM
)
7888 if (GET_CODE (op
) == CONST
)
7891 if (GET_CODE (op
) == PLUS
)
7894 if (CONST_INT_P (op
))
7900 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7901 the mode. If we can't find (or don't know) the alignment of the symbol
7902 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7903 should be pessimistic]. Offsets are validated in the same way as for
7906 darwin_rs6000_legitimate_lo_sum_const_p (rtx x
, machine_mode mode
)
7908 /* We should not get here with this. */
7909 gcc_checking_assert (! mode_supports_dq_form (mode
));
7911 if (GET_CODE (x
) == CONST
)
7914 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
7915 x
= XVECEXP (x
, 0, 0);
7918 unsigned HOST_WIDE_INT offset
= 0;
7920 if (GET_CODE (x
) == PLUS
)
7923 if (! SYMBOL_REF_P (sym
))
7925 if (!CONST_INT_P (XEXP (x
, 1)))
7927 offset
= INTVAL (XEXP (x
, 1));
7929 else if (SYMBOL_REF_P (x
))
7931 else if (CONST_INT_P (x
))
7932 offset
= INTVAL (x
);
7933 else if (GET_CODE (x
) == LABEL_REF
)
7934 offset
= 0; // We assume code labels are Pmode aligned
7936 return false; // not sure what we have here.
7938 /* If we don't know the alignment of the thing to which the symbol refers,
7939 we assume optimistically it is "enough".
7940 ??? maybe we should be pessimistic instead. */
7945 tree decl
= SYMBOL_REF_DECL (sym
);
7947 if (MACHO_SYMBOL_INDIRECTION_P (sym
))
7948 /* The decl in an indirection symbol is the original one, which might
7949 be less aligned than the indirection. Our indirections are always
7954 if (decl
&& DECL_ALIGN (decl
))
7955 align
= DECL_ALIGN_UNIT (decl
);
7958 unsigned int extra
= 0;
7964 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7966 if (VECTOR_MEM_VSX_P (mode
))
7969 if (!TARGET_POWERPC64
)
7971 else if ((offset
& 3) || (align
& 3))
7982 if (!TARGET_POWERPC64
)
7984 else if ((offset
& 3) || (align
& 3))
7992 /* We only care if the access(es) would cause a change to the high part. */
7993 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
7994 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
7997 /* Return true if the MEM operand is a memory operand suitable for use
7998 with a (full width, possibly multiple) gpr load/store. On
7999 powerpc64 this means the offset must be divisible by 4.
8000 Implements 'Y' constraint.
8002 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8003 a constraint function we know the operand has satisfied a suitable
8006 Offsetting a lo_sum should not be allowed, except where we know by
8007 alignment that a 32k boundary is not crossed. Note that by
8008 "offsetting" here we mean a further offset to access parts of the
8009 MEM. It's fine to have a lo_sum where the inner address is offset
8010 from a sym, since the same sym+offset will appear in the high part
8011 of the address calculation. */
8014 mem_operand_gpr (rtx op
, machine_mode mode
)
8016 unsigned HOST_WIDE_INT offset
;
8018 rtx addr
= XEXP (op
, 0);
8020 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8022 && (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
8023 && mode_supports_pre_incdec_p (mode
)
8024 && legitimate_indirect_address_p (XEXP (addr
, 0), false))
8027 /* Allow prefixed instructions if supported. If the bottom two bits of the
8028 offset are non-zero, we could use a prefixed instruction (which does not
8029 have the DS-form constraint that the traditional instruction had) instead
8030 of forcing the unaligned offset to a GPR. */
8031 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8034 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8035 really OK. Doing this early avoids teaching all the other machinery
8037 if (TARGET_MACHO
&& GET_CODE (addr
) == LO_SUM
)
8038 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr
, 1), mode
);
8040 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8041 if (!rs6000_offsettable_memref_p (op
, mode
, false))
8044 op
= address_offset (addr
);
8048 offset
= INTVAL (op
);
8049 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8052 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8056 if (GET_CODE (addr
) == LO_SUM
)
8057 /* For lo_sum addresses, we must allow any offset except one that
8058 causes a wrap, so test only the low 16 bits. */
8059 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8061 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8064 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8065 enforce an offset divisible by 4 even for 32-bit. */
8068 mem_operand_ds_form (rtx op
, machine_mode mode
)
8070 unsigned HOST_WIDE_INT offset
;
8072 rtx addr
= XEXP (op
, 0);
8074 /* Allow prefixed instructions if supported. If the bottom two bits of the
8075 offset are non-zero, we could use a prefixed instruction (which does not
8076 have the DS-form constraint that the traditional instruction had) instead
8077 of forcing the unaligned offset to a GPR. */
8078 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8081 if (!offsettable_address_p (false, mode
, addr
))
8084 op
= address_offset (addr
);
8088 offset
= INTVAL (op
);
8089 if ((offset
& 3) != 0)
8092 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8096 if (GET_CODE (addr
) == LO_SUM
)
8097 /* For lo_sum addresses, we must allow any offset except one that
8098 causes a wrap, so test only the low 16 bits. */
8099 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8101 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8104 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8107 reg_offset_addressing_ok_p (machine_mode mode
)
8121 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8122 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8123 a vector mode, if we want to use the VSX registers to move it around,
8124 we need to restrict ourselves to reg+reg addressing. Similarly for
8125 IEEE 128-bit floating point that is passed in a single vector
8127 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8128 return mode_supports_dq_form (mode
);
8131 /* The vector pair/quad types support offset addressing if the
8132 underlying vectors support offset addressing. */
8138 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8139 addressing for the LFIWZX and STFIWX instructions. */
8140 if (TARGET_NO_SDMODE_STACK
)
8152 virtual_stack_registers_memory_p (rtx op
)
8157 regnum
= REGNO (op
);
8159 else if (GET_CODE (op
) == PLUS
8160 && REG_P (XEXP (op
, 0))
8161 && CONST_INT_P (XEXP (op
, 1)))
8162 regnum
= REGNO (XEXP (op
, 0));
8167 return (regnum
>= FIRST_VIRTUAL_REGISTER
8168 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8171 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8172 is known to not straddle a 32k boundary. This function is used
8173 to determine whether -mcmodel=medium code can use TOC pointer
8174 relative addressing for OP. This means the alignment of the TOC
8175 pointer must also be taken into account, and unfortunately that is
8178 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8179 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8183 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8187 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8189 if (!SYMBOL_REF_P (op
))
8192 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8194 if (mode_supports_dq_form (mode
))
8197 dsize
= GET_MODE_SIZE (mode
);
8198 decl
= SYMBOL_REF_DECL (op
);
8204 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8205 replacing memory addresses with an anchor plus offset. We
8206 could find the decl by rummaging around in the block->objects
8207 VEC for the given offset but that seems like too much work. */
8208 dalign
= BITS_PER_UNIT
;
8209 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8210 && SYMBOL_REF_ANCHOR_P (op
)
8211 && SYMBOL_REF_BLOCK (op
) != NULL
)
8213 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8215 dalign
= block
->alignment
;
8216 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8218 else if (CONSTANT_POOL_ADDRESS_P (op
))
8220 /* It would be nice to have get_pool_align().. */
8221 machine_mode cmode
= get_pool_mode (op
);
8223 dalign
= GET_MODE_ALIGNMENT (cmode
);
8226 else if (DECL_P (decl
))
8228 dalign
= DECL_ALIGN (decl
);
8232 /* Allow BLKmode when the entire object is known to not
8233 cross a 32k boundary. */
8234 if (!DECL_SIZE_UNIT (decl
))
8237 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8240 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8244 dalign
/= BITS_PER_UNIT
;
8245 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8246 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8247 return dalign
>= dsize
;
8253 /* Find how many bits of the alignment we know for this access. */
8254 dalign
/= BITS_PER_UNIT
;
8255 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8256 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8258 lsb
= offset
& -offset
;
8262 return dalign
>= dsize
;
8266 constant_pool_expr_p (rtx op
)
8270 split_const (op
, &base
, &offset
);
8271 return (SYMBOL_REF_P (base
)
8272 && CONSTANT_POOL_ADDRESS_P (base
)
8273 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8276 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8277 use that as the register to put the HIGH value into if register allocation
8281 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
8283 rtx tocrel
, tocreg
, hi
;
8285 gcc_assert (TARGET_TOC
);
8287 if (TARGET_DEBUG_ADDR
)
8289 if (SYMBOL_REF_P (symbol
))
8290 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8294 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
8295 GET_RTX_NAME (GET_CODE (symbol
)));
8300 if (!can_create_pseudo_p ())
8301 df_set_regs_ever_live (TOC_REGISTER
, true);
8303 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
8304 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
8305 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
8308 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
8309 if (largetoc_reg
!= NULL
)
8311 emit_move_insn (largetoc_reg
, hi
);
8314 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
8317 /* These are only used to pass through from print_operand/print_operand_address
8318 to rs6000_output_addr_const_extra over the intervening function
8319 output_addr_const which is not target code. */
8320 static const_rtx tocrel_base_oac
, tocrel_offset_oac
;
8322 /* Return true if OP is a toc pointer relative address (the output
8323 of create_TOC_reference). If STRICT, do not match non-split
8324 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8325 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8326 TOCREL_OFFSET_RET respectively. */
8329 toc_relative_expr_p (const_rtx op
, bool strict
, const_rtx
*tocrel_base_ret
,
8330 const_rtx
*tocrel_offset_ret
)
8335 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8337 /* When strict ensure we have everything tidy. */
8339 && !(GET_CODE (op
) == LO_SUM
8340 && REG_P (XEXP (op
, 0))
8341 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8344 /* When not strict, allow non-split TOC addresses and also allow
8345 (lo_sum (high ..)) TOC addresses created during reload. */
8346 if (GET_CODE (op
) == LO_SUM
)
8350 const_rtx tocrel_base
= op
;
8351 const_rtx tocrel_offset
= const0_rtx
;
8353 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8355 tocrel_base
= XEXP (op
, 0);
8356 tocrel_offset
= XEXP (op
, 1);
8359 if (tocrel_base_ret
)
8360 *tocrel_base_ret
= tocrel_base
;
8361 if (tocrel_offset_ret
)
8362 *tocrel_offset_ret
= tocrel_offset
;
8364 return (GET_CODE (tocrel_base
) == UNSPEC
8365 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
8366 && REG_P (XVECEXP (tocrel_base
, 0, 1))
8367 && REGNO (XVECEXP (tocrel_base
, 0, 1)) == TOC_REGISTER
);
8370 /* Return true if X is a constant pool address, and also for cmodel=medium
8371 if X is a toc-relative address known to be offsettable within MODE. */
8374 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8377 const_rtx tocrel_base
, tocrel_offset
;
8378 return (toc_relative_expr_p (x
, strict
, &tocrel_base
, &tocrel_offset
)
8379 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8380 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8382 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8383 INTVAL (tocrel_offset
), mode
)));
8387 legitimate_small_data_p (machine_mode mode
, rtx x
)
8389 return (DEFAULT_ABI
== ABI_V4
8390 && !flag_pic
&& !TARGET_TOC
8391 && (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
)
8392 && small_data_operand (x
, mode
));
8396 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8397 bool strict
, bool worst_case
)
8399 unsigned HOST_WIDE_INT offset
;
8402 if (GET_CODE (x
) != PLUS
)
8404 if (!REG_P (XEXP (x
, 0)))
8406 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8408 if (mode_supports_dq_form (mode
))
8409 return quad_address_p (x
, mode
, strict
);
8410 if (!reg_offset_addressing_ok_p (mode
))
8411 return virtual_stack_registers_memory_p (x
);
8412 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8414 if (!CONST_INT_P (XEXP (x
, 1)))
8417 offset
= INTVAL (XEXP (x
, 1));
8424 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8426 if (VECTOR_MEM_VSX_P (mode
))
8431 if (!TARGET_POWERPC64
)
8433 else if (offset
& 3)
8446 if (!TARGET_POWERPC64
)
8448 else if (offset
& 3)
8456 if (TARGET_PREFIXED
)
8457 return SIGNED_34BIT_OFFSET_EXTRA_P (offset
, extra
);
8459 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8463 legitimate_indexed_address_p (rtx x
, int strict
)
8467 if (GET_CODE (x
) != PLUS
)
8473 return (REG_P (op0
) && REG_P (op1
)
8474 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
8475 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
8476 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
8477 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
8481 avoiding_indexed_address_p (machine_mode mode
)
8483 unsigned int msize
= GET_MODE_SIZE (mode
);
8485 /* Avoid indexed addressing for modes that have non-indexed load/store
8486 instruction forms. On power10, vector pairs have an indexed
8487 form, but vector quads don't. */
8491 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
8495 legitimate_indirect_address_p (rtx x
, int strict
)
8497 return REG_P (x
) && INT_REG_OK_FOR_BASE_P (x
, strict
);
8501 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
8503 if (!TARGET_MACHO
|| !flag_pic
8504 || mode
!= SImode
|| !MEM_P (x
))
8508 if (GET_CODE (x
) != LO_SUM
)
8510 if (!REG_P (XEXP (x
, 0)))
8512 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
8516 return CONSTANT_P (x
);
8520 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
8522 if (GET_CODE (x
) != LO_SUM
)
8524 if (!REG_P (XEXP (x
, 0)))
8526 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8528 /* quad word addresses are restricted, and we can't use LO_SUM. */
8529 if (mode_supports_dq_form (mode
))
8533 if (TARGET_ELF
|| TARGET_MACHO
)
8537 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
8539 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8540 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8541 recognizes some LO_SUM addresses as valid although this
8542 function says opposite. In most cases, LRA through different
8543 transformations can generate correct code for address reloads.
8544 It cannot manage only some LO_SUM cases. So we need to add
8545 code here saying that some addresses are still valid. */
8546 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
8547 && small_toc_ref (x
, VOIDmode
));
8548 if (TARGET_TOC
&& ! large_toc_ok
)
8550 if (GET_MODE_NUNITS (mode
) != 1)
8552 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
8553 && !(/* ??? Assume floating point reg based on mode? */
8554 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
8557 return CONSTANT_P (x
) || large_toc_ok
;
8564 /* Try machine-dependent ways of modifying an illegitimate address
8565 to be legitimate. If we find one, return the new, valid address.
8566 This is used from only one place: `memory_address' in explow.c.
8568 OLDX is the address as it was before break_out_memory_refs was
8569 called. In some cases it is useful to look at this to decide what
8572 It is always safe for this function to do nothing. It exists to
8573 recognize opportunities to optimize the output.
8575 On RS/6000, first check for the sum of a register with a constant
8576 integer that is out of range. If so, generate code to add the
8577 constant with the low-order 16 bits masked to the register and force
8578 this result into another register (this can be done with `cau').
8579 Then generate an address of REG+(CONST&0xffff), allowing for the
8580 possibility of bit 16 being a one.
8582 Then check for the sum of a register and something not constant, try to
8583 load the other things into a register and return the sum. */
8586 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
8591 if (!reg_offset_addressing_ok_p (mode
)
8592 || mode_supports_dq_form (mode
))
8594 if (virtual_stack_registers_memory_p (x
))
8597 /* In theory we should not be seeing addresses of the form reg+0,
8598 but just in case it is generated, optimize it away. */
8599 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
8600 return force_reg (Pmode
, XEXP (x
, 0));
8602 /* For TImode with load/store quad, restrict addresses to just a single
8603 pointer, so it works with both GPRs and VSX registers. */
8604 /* Make sure both operands are registers. */
8605 else if (GET_CODE (x
) == PLUS
8606 && (mode
!= TImode
|| !TARGET_VSX
))
8607 return gen_rtx_PLUS (Pmode
,
8608 force_reg (Pmode
, XEXP (x
, 0)),
8609 force_reg (Pmode
, XEXP (x
, 1)));
8611 return force_reg (Pmode
, x
);
8613 if (SYMBOL_REF_P (x
))
8615 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
8617 return rs6000_legitimize_tls_address (x
, model
);
8629 /* As in legitimate_offset_address_p we do not assume
8630 worst-case. The mode here is just a hint as to the registers
8631 used. A TImode is usually in gprs, but may actually be in
8632 fprs. Leave worst-case scenario for reload to handle via
8633 insn constraints. PTImode is only GPRs. */
8640 if (GET_CODE (x
) == PLUS
8641 && REG_P (XEXP (x
, 0))
8642 && CONST_INT_P (XEXP (x
, 1))
8643 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
8644 >= 0x10000 - extra
))
8646 HOST_WIDE_INT high_int
, low_int
;
8648 low_int
= ((INTVAL (XEXP (x
, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8649 if (low_int
>= 0x8000 - extra
)
8651 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
8652 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
8653 gen_int_mode (high_int
, Pmode
)), 0);
8654 return plus_constant (Pmode
, sum
, low_int
);
8656 else if (GET_CODE (x
) == PLUS
8657 && REG_P (XEXP (x
, 0))
8658 && !CONST_INT_P (XEXP (x
, 1))
8659 && GET_MODE_NUNITS (mode
) == 1
8660 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
8661 || (/* ??? Assume floating point reg based on mode? */
8662 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
8663 && !avoiding_indexed_address_p (mode
))
8665 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
8666 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
8668 else if ((TARGET_ELF
8670 || !MACHO_DYNAMIC_NO_PIC_P
8674 && TARGET_NO_TOC_OR_PCREL
8677 && !CONST_WIDE_INT_P (x
)
8678 && !CONST_DOUBLE_P (x
)
8680 && GET_MODE_NUNITS (mode
) == 1
8681 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
8682 || (/* ??? Assume floating point reg based on mode? */
8683 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
))))
8685 rtx reg
= gen_reg_rtx (Pmode
);
8687 emit_insn (gen_elf_high (reg
, x
));
8689 emit_insn (gen_macho_high (Pmode
, reg
, x
));
8690 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
8694 && constant_pool_expr_p (x
)
8695 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
8696 return create_TOC_reference (x
, NULL_RTX
);
8701 /* Debug version of rs6000_legitimize_address. */
8703 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
8709 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
8710 insns
= get_insns ();
8716 "\nrs6000_legitimize_address: mode %s, old code %s, "
8717 "new code %s, modified\n",
8718 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
8719 GET_RTX_NAME (GET_CODE (ret
)));
8721 fprintf (stderr
, "Original address:\n");
8724 fprintf (stderr
, "oldx:\n");
8727 fprintf (stderr
, "New address:\n");
8732 fprintf (stderr
, "Insns added:\n");
8733 debug_rtx_list (insns
, 20);
8739 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8740 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
8751 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8752 We need to emit DTP-relative relocations. */
8754 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
8756 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
8761 fputs ("\t.long\t", file
);
8764 fputs (DOUBLE_INT_ASM_OP
, file
);
8769 output_addr_const (file
, x
);
8771 fputs ("@dtprel+0x8000", file
);
8772 else if (TARGET_XCOFF
&& SYMBOL_REF_P (x
))
8774 switch (SYMBOL_REF_TLS_MODEL (x
))
8778 case TLS_MODEL_LOCAL_EXEC
:
8779 fputs ("@le", file
);
8781 case TLS_MODEL_INITIAL_EXEC
:
8782 fputs ("@ie", file
);
8784 case TLS_MODEL_GLOBAL_DYNAMIC
:
8785 case TLS_MODEL_LOCAL_DYNAMIC
:
8794 /* Return true if X is a symbol that refers to real (rather than emulated)
8798 rs6000_real_tls_symbol_ref_p (rtx x
)
8800 return (SYMBOL_REF_P (x
)
8801 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
8804 /* In the name of slightly smaller debug output, and to cater to
8805 general assembler lossage, recognize various UNSPEC sequences
8806 and turn them back into a direct symbol reference. */
8809 rs6000_delegitimize_address (rtx orig_x
)
8813 if (GET_CODE (orig_x
) == UNSPEC
&& XINT (orig_x
, 1) == UNSPEC_FUSION_GPR
)
8814 orig_x
= XVECEXP (orig_x
, 0, 0);
8816 orig_x
= delegitimize_mem_from_attrs (orig_x
);
8823 if (TARGET_CMODEL
!= CMODEL_SMALL
&& GET_CODE (y
) == LO_SUM
)
8827 if (GET_CODE (y
) == PLUS
8828 && GET_MODE (y
) == Pmode
8829 && CONST_INT_P (XEXP (y
, 1)))
8831 offset
= XEXP (y
, 1);
8835 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_TOCREL
)
8837 y
= XVECEXP (y
, 0, 0);
8840 /* Do not associate thread-local symbols with the original
8841 constant pool symbol. */
8844 && CONSTANT_POOL_ADDRESS_P (y
)
8845 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
8849 if (offset
!= NULL_RTX
)
8850 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
8851 if (!MEM_P (orig_x
))
8854 return replace_equiv_address_nv (orig_x
, y
);
8858 && GET_CODE (orig_x
) == LO_SUM
8859 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
8861 y
= XEXP (XEXP (orig_x
, 1), 0);
8862 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
8863 return XVECEXP (y
, 0, 0);
8869 /* Return true if X shouldn't be emitted into the debug info.
8870 The linker doesn't like .toc section references from
8871 .debug_* sections, so reject .toc section symbols. */
8874 rs6000_const_not_ok_for_debug_p (rtx x
)
8876 if (GET_CODE (x
) == UNSPEC
)
8878 if (SYMBOL_REF_P (x
)
8879 && CONSTANT_POOL_ADDRESS_P (x
))
8881 rtx c
= get_pool_constant (x
);
8882 machine_mode cmode
= get_pool_mode (x
);
8883 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
8890 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8893 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
8895 int icode
= INSN_CODE (insn
);
8897 /* Reject creating doloop insns. Combine should not be allowed
8898 to create these for a number of reasons:
8899 1) In a nested loop, if combine creates one of these in an
8900 outer loop and the register allocator happens to allocate ctr
8901 to the outer loop insn, then the inner loop can't use ctr.
8902 Inner loops ought to be more highly optimized.
8903 2) Combine often wants to create one of these from what was
8904 originally a three insn sequence, first combining the three
8905 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8906 allocated ctr, the splitter takes use back to the three insn
8907 sequence. It's better to stop combine at the two insn
8909 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8910 insns, the register allocator sometimes uses floating point
8911 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8912 jump insn and output reloads are not implemented for jumps,
8913 the ctrsi/ctrdi splitters need to handle all possible cases.
8914 That's a pain, and it gets to be seriously difficult when a
8915 splitter that runs after reload needs memory to transfer from
8916 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8917 for the difficult case. It's better to not create problems
8918 in the first place. */
8919 if (icode
!= CODE_FOR_nothing
8920 && (icode
== CODE_FOR_bdz_si
8921 || icode
== CODE_FOR_bdz_di
8922 || icode
== CODE_FOR_bdnz_si
8923 || icode
== CODE_FOR_bdnz_di
8924 || icode
== CODE_FOR_bdztf_si
8925 || icode
== CODE_FOR_bdztf_di
8926 || icode
== CODE_FOR_bdnztf_si
8927 || icode
== CODE_FOR_bdnztf_di
))
8933 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8935 static GTY(()) rtx rs6000_tls_symbol
;
8937 rs6000_tls_get_addr (void)
8939 if (!rs6000_tls_symbol
)
8940 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
8942 return rs6000_tls_symbol
;
8945 /* Construct the SYMBOL_REF for TLS GOT references. */
8947 static GTY(()) rtx rs6000_got_symbol
;
8949 rs6000_got_sym (void)
8951 if (!rs6000_got_symbol
)
8953 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
8954 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
8955 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
8958 return rs6000_got_symbol
;
8961 /* AIX Thread-Local Address support. */
8964 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
8966 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
, tlsaddr
;
8970 name
= XSTR (addr
, 0);
8971 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8972 or the symbol will be in TLS private data section. */
8973 if (name
[strlen (name
) - 1] != ']'
8974 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr
))
8975 || bss_initializer_p (SYMBOL_REF_DECL (addr
))))
8977 tlsname
= XALLOCAVEC (char, strlen (name
) + 4);
8978 strcpy (tlsname
, name
);
8980 bss_initializer_p (SYMBOL_REF_DECL (addr
)) ? "[UL]" : "[TL]");
8981 tlsaddr
= copy_rtx (addr
);
8982 XSTR (tlsaddr
, 0) = ggc_strdup (tlsname
);
8987 /* Place addr into TOC constant pool. */
8988 sym
= force_const_mem (GET_MODE (tlsaddr
), tlsaddr
);
8990 /* Output the TOC entry and create the MEM referencing the value. */
8991 if (constant_pool_expr_p (XEXP (sym
, 0))
8992 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
8994 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
8995 mem
= gen_const_mem (Pmode
, tocref
);
8996 set_mem_alias_set (mem
, get_TOC_alias_set ());
9001 /* Use global-dynamic for local-dynamic. */
9002 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9003 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9005 /* Create new TOC reference for @m symbol. */
9006 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9007 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9008 strcpy (tlsname
, "*LCM");
9009 strcat (tlsname
, name
+ 3);
9010 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9011 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9012 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9013 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9014 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9016 rtx modreg
= gen_reg_rtx (Pmode
);
9017 emit_insn (gen_rtx_SET (modreg
, modmem
));
9019 tmpreg
= gen_reg_rtx (Pmode
);
9020 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9022 dest
= gen_reg_rtx (Pmode
);
9024 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9026 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9029 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9030 else if (TARGET_32BIT
)
9032 tlsreg
= gen_reg_rtx (SImode
);
9033 emit_insn (gen_tls_get_tpointer (tlsreg
));
9036 tlsreg
= gen_rtx_REG (DImode
, 13);
9038 /* Load the TOC value into temporary register. */
9039 tmpreg
= gen_reg_rtx (Pmode
);
9040 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9041 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9042 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9044 /* Add TOC symbol value to TLS pointer. */
9045 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9050 /* Passes the tls arg value for global dynamic and local dynamic
9051 emit_library_call_value in rs6000_legitimize_tls_address to
9052 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9053 marker relocs put on __tls_get_addr calls. */
9054 static rtx global_tlsarg
;
9056 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9057 this (thread-local) address. */
9060 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9065 return rs6000_legitimize_tls_address_aix (addr
, model
);
9067 dest
= gen_reg_rtx (Pmode
);
9068 if (model
== TLS_MODEL_LOCAL_EXEC
9069 && (rs6000_tls_size
== 16 || rs6000_pcrel_p ()))
9075 tlsreg
= gen_rtx_REG (Pmode
, 13);
9076 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9080 tlsreg
= gen_rtx_REG (Pmode
, 2);
9081 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9085 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9089 tmp
= gen_reg_rtx (Pmode
);
9092 tlsreg
= gen_rtx_REG (Pmode
, 13);
9093 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9097 tlsreg
= gen_rtx_REG (Pmode
, 2);
9098 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9102 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9104 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9109 rtx got
, tga
, tmp1
, tmp2
;
9111 /* We currently use relocations like @got@tlsgd for tls, which
9112 means the linker will handle allocation of tls entries, placing
9113 them in the .got section. So use a pointer to the .got section,
9114 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9115 or to secondary GOT sections used by 32-bit -fPIC. */
9116 if (rs6000_pcrel_p ())
9118 else if (TARGET_64BIT
)
9119 got
= gen_rtx_REG (Pmode
, 2);
9123 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9126 rtx gsym
= rs6000_got_sym ();
9127 got
= gen_reg_rtx (Pmode
);
9129 rs6000_emit_move (got
, gsym
, Pmode
);
9134 tmp1
= gen_reg_rtx (Pmode
);
9135 tmp2
= gen_reg_rtx (Pmode
);
9136 mem
= gen_const_mem (Pmode
, tmp1
);
9137 lab
= gen_label_rtx ();
9138 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9139 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9140 if (TARGET_LINK_STACK
)
9141 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9142 emit_move_insn (tmp2
, mem
);
9143 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9144 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9149 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9151 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addr
, got
),
9153 tga
= rs6000_tls_get_addr ();
9154 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9155 emit_insn (gen_rtx_SET (argreg
, arg
));
9156 global_tlsarg
= arg
;
9157 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9158 global_tlsarg
= NULL_RTX
;
9160 /* Make a note so that the result of this call can be CSEd. */
9161 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9162 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9163 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9165 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9167 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, got
), UNSPEC_TLSLD
);
9168 tga
= rs6000_tls_get_addr ();
9169 tmp1
= gen_reg_rtx (Pmode
);
9170 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9171 emit_insn (gen_rtx_SET (argreg
, arg
));
9172 global_tlsarg
= arg
;
9173 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9174 global_tlsarg
= NULL_RTX
;
9176 /* Make a note so that the result of this call can be CSEd. */
9177 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9178 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9179 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9181 if (rs6000_tls_size
== 16 || rs6000_pcrel_p ())
9184 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9186 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9188 else if (rs6000_tls_size
== 32)
9190 tmp2
= gen_reg_rtx (Pmode
);
9192 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9194 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9197 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9199 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9203 tmp2
= gen_reg_rtx (Pmode
);
9205 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9207 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9209 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9215 /* IE, or 64-bit offset LE. */
9216 tmp2
= gen_reg_rtx (Pmode
);
9218 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9220 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9222 if (rs6000_pcrel_p ())
9225 insn
= gen_tls_tls_pcrel_64 (dest
, tmp2
, addr
);
9227 insn
= gen_tls_tls_pcrel_32 (dest
, tmp2
, addr
);
9229 else if (TARGET_64BIT
)
9230 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9232 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9240 /* Only create the global variable for the stack protect guard if we are using
9241 the global flavor of that guard. */
9243 rs6000_init_stack_protect_guard (void)
9245 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9246 return default_stack_protect_guard ();
9251 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9254 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9256 if (GET_CODE (x
) == HIGH
9257 && GET_CODE (XEXP (x
, 0)) == UNSPEC
)
9260 /* A TLS symbol in the TOC cannot contain a sum. */
9261 if (GET_CODE (x
) == CONST
9262 && GET_CODE (XEXP (x
, 0)) == PLUS
9263 && SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
9264 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9267 /* Do not place an ELF TLS symbol in the constant pool. */
9268 return TARGET_ELF
&& tls_referenced_p (x
);
9271 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9272 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9273 can be addressed relative to the toc pointer. */
9276 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9278 return ((constant_pool_expr_p (sym
)
9279 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9280 get_pool_mode (sym
)))
9281 || (TARGET_CMODEL
== CMODEL_MEDIUM
9282 && SYMBOL_REF_LOCAL_P (sym
)
9283 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9286 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9287 that is a valid memory address for an instruction.
9288 The MODE argument is the machine mode for the MEM expression
9289 that wants to use this address.
9291 On the RS/6000, there are four valid address: a SYMBOL_REF that
9292 refers to a constant pool entry of an address (or the sum of it
9293 plus a constant), a short (16-bit signed) constant plus a register,
9294 the sum of two registers, or a register indirect, possibly with an
9295 auto-increment. For DFmode, DDmode and DImode with a constant plus
9296 register, we must ensure that both words are addressable or PowerPC64
9297 with offset word aligned.
9299 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9300 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9301 because adjacent memory cells are accessed by adding word-sized offsets
9302 during assembly output. */
9304 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
)
9306 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9307 bool quad_offset_p
= mode_supports_dq_form (mode
);
9309 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
9312 /* Handle unaligned altivec lvx/stvx type addresses. */
9313 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
9314 && GET_CODE (x
) == AND
9315 && CONST_INT_P (XEXP (x
, 1))
9316 && INTVAL (XEXP (x
, 1)) == -16)
9319 return (legitimate_indirect_address_p (x
, reg_ok_strict
)
9320 || legitimate_indexed_address_p (x
, reg_ok_strict
)
9321 || virtual_stack_registers_memory_p (x
));
9324 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
9327 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
9328 && mode_supports_pre_incdec_p (mode
)
9329 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
9332 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9333 if (address_is_prefixed (x
, mode
, NON_PREFIXED_DEFAULT
))
9336 /* Handle restricted vector d-form offsets in ISA 3.0. */
9339 if (quad_address_p (x
, mode
, reg_ok_strict
))
9342 else if (virtual_stack_registers_memory_p (x
))
9345 else if (reg_offset_p
)
9347 if (legitimate_small_data_p (mode
, x
))
9349 if (legitimate_constant_pool_address_p (x
, mode
,
9350 reg_ok_strict
|| lra_in_progress
))
9354 /* For TImode, if we have TImode in VSX registers, only allow register
9355 indirect addresses. This will allow the values to go in either GPRs
9356 or VSX registers without reloading. The vector types would tend to
9357 go into VSX registers, so we allow REG+REG, while TImode seems
9358 somewhat split, in that some uses are GPR based, and some VSX based. */
9359 /* FIXME: We could loosen this by changing the following to
9360 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9361 but currently we cannot allow REG+REG addressing for TImode. See
9362 PR72827 for complete details on how this ends up hoodwinking DSE. */
9363 if (mode
== TImode
&& TARGET_VSX
)
9365 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9368 && GET_CODE (x
) == PLUS
9369 && REG_P (XEXP (x
, 0))
9370 && (XEXP (x
, 0) == virtual_stack_vars_rtx
9371 || XEXP (x
, 0) == arg_pointer_rtx
)
9372 && CONST_INT_P (XEXP (x
, 1)))
9374 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
9376 if (!FLOAT128_2REG_P (mode
)
9377 && (TARGET_HARD_FLOAT
9379 || (mode
!= DFmode
&& mode
!= DDmode
))
9380 && (TARGET_POWERPC64
|| mode
!= DImode
)
9381 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
9383 && !avoiding_indexed_address_p (mode
)
9384 && legitimate_indexed_address_p (x
, reg_ok_strict
))
9386 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
9387 && mode_supports_pre_modify_p (mode
)
9388 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
9389 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
9390 reg_ok_strict
, false)
9391 || (!avoiding_indexed_address_p (mode
)
9392 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
9393 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
9395 /* There is no prefixed version of the load/store with update. */
9396 rtx addr
= XEXP (x
, 1);
9397 return !address_is_prefixed (addr
, mode
, NON_PREFIXED_DEFAULT
);
9399 if (reg_offset_p
&& !quad_offset_p
9400 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
9405 /* Debug version of rs6000_legitimate_address_p. */
9407 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
,
9410 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
);
9412 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9413 "strict = %d, reload = %s, code = %s\n",
9414 ret
? "true" : "false",
9415 GET_MODE_NAME (mode
),
9417 (reload_completed
? "after" : "before"),
9418 GET_RTX_NAME (GET_CODE (x
)));
9424 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9427 rs6000_mode_dependent_address_p (const_rtx addr
,
9428 addr_space_t as ATTRIBUTE_UNUSED
)
9430 return rs6000_mode_dependent_address_ptr (addr
);
9433 /* Go to LABEL if ADDR (a legitimate address expression)
9434 has an effect that depends on the machine mode it is used for.
9436 On the RS/6000 this is true of all integral offsets (since AltiVec
9437 and VSX modes don't allow them) or is a pre-increment or decrement.
9439 ??? Except that due to conceptual problems in offsettable_address_p
9440 we can't really report the problems of integral offsets. So leave
9441 this assuming that the adjustable offset must be valid for the
9442 sub-words of a TFmode operand, which is what we had before. */
9445 rs6000_mode_dependent_address (const_rtx addr
)
9447 switch (GET_CODE (addr
))
9450 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9451 is considered a legitimate address before reload, so there
9452 are no offset restrictions in that case. Note that this
9453 condition is safe in strict mode because any address involving
9454 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9455 been rejected as illegitimate. */
9456 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
9457 && XEXP (addr
, 0) != arg_pointer_rtx
9458 && CONST_INT_P (XEXP (addr
, 1)))
9460 HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
9461 HOST_WIDE_INT extra
= TARGET_POWERPC64
? 8 : 12;
9462 if (TARGET_PREFIXED
)
9463 return !SIGNED_34BIT_OFFSET_EXTRA_P (val
, extra
);
9465 return !SIGNED_16BIT_OFFSET_EXTRA_P (val
, extra
);
9470 /* Anything in the constant pool is sufficiently aligned that
9471 all bytes have the same high part address. */
9472 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
9474 /* Auto-increment cases are now treated generically in recog.c. */
9476 return TARGET_UPDATE
;
9478 /* AND is only allowed in Altivec loads. */
9489 /* Debug version of rs6000_mode_dependent_address. */
9491 rs6000_debug_mode_dependent_address (const_rtx addr
)
9493 bool ret
= rs6000_mode_dependent_address (addr
);
9495 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
9496 ret
? "true" : "false");
9502 /* Implement FIND_BASE_TERM. */
9505 rs6000_find_base_term (rtx op
)
9510 if (GET_CODE (base
) == CONST
)
9511 base
= XEXP (base
, 0);
9512 if (GET_CODE (base
) == PLUS
)
9513 base
= XEXP (base
, 0);
9514 if (GET_CODE (base
) == UNSPEC
)
9515 switch (XINT (base
, 1))
9518 case UNSPEC_MACHOPIC_OFFSET
:
9519 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9520 for aliasing purposes. */
9521 return XVECEXP (base
, 0, 0);
9527 /* More elaborate version of recog's offsettable_memref_p predicate
9528 that works around the ??? note of rs6000_mode_dependent_address.
9529 In particular it accepts
9531 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9533 in 32-bit mode, that the recog predicate rejects. */
9536 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
, bool strict
)
9543 /* First mimic offsettable_memref_p. */
9544 if (offsettable_address_p (strict
, GET_MODE (op
), XEXP (op
, 0)))
9547 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9548 the latter predicate knows nothing about the mode of the memory
9549 reference and, therefore, assumes that it is the largest supported
9550 mode (TFmode). As a consequence, legitimate offsettable memory
9551 references are rejected. rs6000_legitimate_offset_address_p contains
9552 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9553 at least with a little bit of help here given that we know the
9554 actual registers used. */
9555 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
9556 || GET_MODE_SIZE (reg_mode
) == 4);
9557 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
9558 strict
, worst_case
);
9561 /* Determine the reassociation width to be used in reassociate_bb.
9562 This takes into account how many parallel operations we
9563 can actually do of a given type, and also the latency.
9567 vect add/sub/mul 2/cycle
9568 fp add/sub/mul 2/cycle
9573 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
9576 switch (rs6000_tune
)
9578 case PROCESSOR_POWER8
:
9579 case PROCESSOR_POWER9
:
9580 case PROCESSOR_POWER10
:
9581 if (DECIMAL_FLOAT_MODE_P (mode
))
9583 if (VECTOR_MODE_P (mode
))
9585 if (INTEGRAL_MODE_P (mode
))
9587 if (FLOAT_MODE_P (mode
))
9596 /* Change register usage conditional on target flags. */
9598 rs6000_conditional_register_usage (void)
9602 if (TARGET_DEBUG_TARGET
)
9603 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
9605 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9607 fixed_regs
[13] = call_used_regs
[13] = 1;
9609 /* Conditionally disable FPRs. */
9610 if (TARGET_SOFT_FLOAT
)
9611 for (i
= 32; i
< 64; i
++)
9612 fixed_regs
[i
] = call_used_regs
[i
] = 1;
9614 /* The TOC register is not killed across calls in a way that is
9615 visible to the compiler. */
9616 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
9617 call_used_regs
[2] = 0;
9619 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
9620 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
9622 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
9623 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
9624 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
9626 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
9627 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
9628 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
9630 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
9631 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
9633 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
9635 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
9636 fixed_regs
[i
] = call_used_regs
[i
] = 1;
9637 call_used_regs
[VRSAVE_REGNO
] = 1;
9640 if (TARGET_ALTIVEC
|| TARGET_VSX
)
9641 global_regs
[VSCR_REGNO
] = 1;
9643 if (TARGET_ALTIVEC_ABI
)
9645 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
9646 call_used_regs
[i
] = 1;
9648 /* AIX reserves VR20:31 in non-extended ABI mode. */
9650 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
9651 fixed_regs
[i
] = call_used_regs
[i
] = 1;
9656 /* Output insns to set DEST equal to the constant SOURCE as a series of
9657 lis, ori and shl instructions and return TRUE. */
9660 rs6000_emit_set_const (rtx dest
, rtx source
)
9662 machine_mode mode
= GET_MODE (dest
);
9667 gcc_checking_assert (CONST_INT_P (source
));
9668 c
= INTVAL (source
);
9673 emit_insn (gen_rtx_SET (dest
, source
));
9677 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
9679 emit_insn (gen_rtx_SET (copy_rtx (temp
),
9680 GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
9681 emit_insn (gen_rtx_SET (dest
,
9682 gen_rtx_IOR (SImode
, copy_rtx (temp
),
9683 GEN_INT (c
& 0xffff))));
9687 if (!TARGET_POWERPC64
)
9691 hi
= operand_subword_force (copy_rtx (dest
), WORDS_BIG_ENDIAN
== 0,
9693 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0,
9695 emit_move_insn (hi
, GEN_INT (c
>> 32));
9696 c
= ((c
& 0xffffffff) ^ 0x80000000) - 0x80000000;
9697 emit_move_insn (lo
, GEN_INT (c
));
9700 rs6000_emit_set_long_const (dest
, c
);
9707 insn
= get_last_insn ();
9708 set
= single_set (insn
);
9709 if (! CONSTANT_P (SET_SRC (set
)))
9710 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
9715 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9716 Output insns to set DEST equal to the constant C as a series of
9717 lis, ori and shl instructions. */
9720 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
)
9723 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
9733 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
9734 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && ! (ud1
& 0x8000)))
9735 emit_move_insn (dest
, GEN_INT ((ud1
^ 0x8000) - 0x8000));
9737 else if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
9738 || (ud4
== 0 && ud3
== 0 && ! (ud2
& 0x8000)))
9740 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9742 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
9743 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
9745 emit_move_insn (dest
,
9746 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9749 else if (ud3
== 0 && ud4
== 0)
9751 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9753 gcc_assert (ud2
& 0x8000);
9754 emit_move_insn (copy_rtx (temp
),
9755 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
9757 emit_move_insn (copy_rtx (temp
),
9758 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9760 emit_move_insn (dest
,
9761 gen_rtx_ZERO_EXTEND (DImode
,
9762 gen_lowpart (SImode
,
9765 else if (ud1
== ud3
&& ud2
== ud4
)
9767 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9768 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
9769 rs6000_emit_set_long_const (temp
, (num
^ 0x80000000) - 0x80000000);
9770 rtx one
= gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff));
9771 rtx two
= gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (32));
9772 emit_move_insn (dest
, gen_rtx_IOR (DImode
, one
, two
));
9774 else if ((ud4
== 0xffff && (ud3
& 0x8000))
9775 || (ud4
== 0 && ! (ud3
& 0x8000)))
9777 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9779 emit_move_insn (copy_rtx (temp
),
9780 GEN_INT (((ud3
<< 16) ^ 0x80000000) - 0x80000000));
9782 emit_move_insn (copy_rtx (temp
),
9783 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9785 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
9786 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
9789 emit_move_insn (dest
,
9790 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9795 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
9797 emit_move_insn (copy_rtx (temp
),
9798 GEN_INT (((ud4
<< 16) ^ 0x80000000) - 0x80000000));
9800 emit_move_insn (copy_rtx (temp
),
9801 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9804 emit_move_insn (ud2
!= 0 || ud1
!= 0 ? copy_rtx (temp
) : dest
,
9805 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
9808 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
9809 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9810 GEN_INT (ud2
<< 16)));
9812 emit_move_insn (dest
,
9813 gen_rtx_IOR (DImode
, copy_rtx (temp
),
9818 /* Helper for the following. Get rid of [r+r] memory refs
9819 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9822 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
9824 if (MEM_P (operands
[0])
9825 && !REG_P (XEXP (operands
[0], 0))
9826 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
9827 GET_MODE (operands
[0]), false))
9829 = replace_equiv_address (operands
[0],
9830 copy_addr_to_reg (XEXP (operands
[0], 0)));
9832 if (MEM_P (operands
[1])
9833 && !REG_P (XEXP (operands
[1], 0))
9834 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
9835 GET_MODE (operands
[1]), false))
9837 = replace_equiv_address (operands
[1],
9838 copy_addr_to_reg (XEXP (operands
[1], 0)));
9841 /* Generate a vector of constants to permute MODE for a little-endian
9842 storage operation by swapping the two halves of a vector. */
9844 rs6000_const_vec (machine_mode mode
)
9872 v
= rtvec_alloc (subparts
);
9874 for (i
= 0; i
< subparts
/ 2; ++i
)
9875 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
9876 for (i
= subparts
/ 2; i
< subparts
; ++i
)
9877 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
9882 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9885 rs6000_emit_le_vsx_permute (rtx dest
, rtx source
, machine_mode mode
)
9887 /* Scalar permutations are easier to express in integer modes rather than
9888 floating-point modes, so cast them here. We use V1TImode instead
9889 of TImode to ensure that the values don't go through GPRs. */
9890 if (FLOAT128_VECTOR_P (mode
))
9892 dest
= gen_lowpart (V1TImode
, dest
);
9893 source
= gen_lowpart (V1TImode
, source
);
9897 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9899 if (mode
== TImode
|| mode
== V1TImode
)
9900 emit_insn (gen_rtx_SET (dest
, gen_rtx_ROTATE (mode
, source
,
9904 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
9905 emit_insn (gen_rtx_SET (dest
, gen_rtx_VEC_SELECT (mode
, source
, par
)));
9909 /* Emit a little-endian load from vector memory location SOURCE to VSX
9910 register DEST in mode MODE. The load is done with two permuting
9911 insn's that represent an lxvd2x and xxpermdi. */
9913 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
9915 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9917 if (mode
== TImode
|| mode
== V1TImode
)
9920 dest
= gen_lowpart (V2DImode
, dest
);
9921 source
= adjust_address (source
, V2DImode
, 0);
9924 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
9925 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
9926 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
9929 /* Emit a little-endian store to vector memory location DEST from VSX
9930 register SOURCE in mode MODE. The store is done with two permuting
9931 insn's that represent an xxpermdi and an stxvd2x. */
9933 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
9935 /* This should never be called after LRA. */
9936 gcc_assert (can_create_pseudo_p ());
9938 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9940 if (mode
== TImode
|| mode
== V1TImode
)
9943 dest
= adjust_address (dest
, V2DImode
, 0);
9944 source
= gen_lowpart (V2DImode
, source
);
9947 rtx tmp
= gen_reg_rtx_and_attrs (source
);
9948 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
9949 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
9952 /* Emit a sequence representing a little-endian VSX load or store,
9953 moving data from SOURCE to DEST in mode MODE. This is done
9954 separately from rs6000_emit_move to ensure it is called only
9955 during expand. LE VSX loads and stores introduced later are
9956 handled with a split. The expand-time RTL generation allows
9957 us to optimize away redundant pairs of register-permutes. */
9959 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
9961 gcc_assert (!BYTES_BIG_ENDIAN
9962 && VECTOR_MEM_VSX_P (mode
)
9963 && !TARGET_P9_VECTOR
9964 && !gpr_or_gpr_p (dest
, source
)
9965 && (MEM_P (source
) ^ MEM_P (dest
)));
9969 gcc_assert (REG_P (dest
) || SUBREG_P (dest
));
9970 rs6000_emit_le_vsx_load (dest
, source
, mode
);
9974 if (!REG_P (source
))
9975 source
= force_reg (mode
, source
);
9976 rs6000_emit_le_vsx_store (dest
, source
, mode
);
9980 /* Return whether a SFmode or SImode move can be done without converting one
9981 mode to another. This arrises when we have:
9983 (SUBREG:SF (REG:SI ...))
9984 (SUBREG:SI (REG:SF ...))
9986 and one of the values is in a floating point/vector register, where SFmode
9987 scalars are stored in DFmode format. */
9990 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
9992 if (TARGET_ALLOW_SF_SUBREG
)
9995 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
9998 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10001 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10002 if (SUBREG_P (dest
))
10004 rtx dest_subreg
= SUBREG_REG (dest
);
10005 rtx src_subreg
= SUBREG_REG (src
);
10006 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10013 /* Helper function to change moves with:
10015 (SUBREG:SF (REG:SI)) and
10016 (SUBREG:SI (REG:SF))
10018 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10019 values are stored as DFmode values in the VSX registers. We need to convert
10020 the bits before we can use a direct move or operate on the bits in the
10021 vector register as an integer type.
10023 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10026 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10028 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_completed
10029 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10030 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10032 rtx inner_source
= SUBREG_REG (source
);
10033 machine_mode inner_mode
= GET_MODE (inner_source
);
10035 if (mode
== SImode
&& inner_mode
== SFmode
)
10037 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10041 if (mode
== SFmode
&& inner_mode
== SImode
)
10043 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10051 /* Emit a move from SOURCE to DEST in mode MODE. */
10053 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10056 operands
[0] = dest
;
10057 operands
[1] = source
;
10059 if (TARGET_DEBUG_ADDR
)
10062 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10063 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10064 GET_MODE_NAME (mode
),
10067 can_create_pseudo_p ());
10069 fprintf (stderr
, "source:\n");
10070 debug_rtx (source
);
10073 /* Check that we get CONST_WIDE_INT only when we should. */
10074 if (CONST_WIDE_INT_P (operands
[1])
10075 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10076 gcc_unreachable ();
10078 #ifdef HAVE_AS_GNU_ATTRIBUTE
10079 /* If we use a long double type, set the flags in .gnu_attribute that say
10080 what the long double type is. This is to allow the linker's warning
10081 message for the wrong long double to be useful, even if the function does
10082 not do a call (for example, doing a 128-bit add on power9 if the long
10083 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10084 used if they aren't the default long dobule type. */
10085 if (rs6000_gnu_attr
&& (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
))
10087 if (TARGET_LONG_DOUBLE_128
&& (mode
== TFmode
|| mode
== TCmode
))
10088 rs6000_passes_float
= rs6000_passes_long_double
= true;
10090 else if (!TARGET_LONG_DOUBLE_128
&& (mode
== DFmode
|| mode
== DCmode
))
10091 rs6000_passes_float
= rs6000_passes_long_double
= true;
10095 /* See if we need to special case SImode/SFmode SUBREG moves. */
10096 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10097 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10100 /* Check if GCC is setting up a block move that will end up using FP
10101 registers as temporaries. We must make sure this is acceptable. */
10102 if (MEM_P (operands
[0])
10103 && MEM_P (operands
[1])
10105 && (rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[0]))
10106 || rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[1])))
10107 && ! (rs6000_slow_unaligned_access (SImode
,
10108 (MEM_ALIGN (operands
[0]) > 32
10109 ? 32 : MEM_ALIGN (operands
[0])))
10110 || rs6000_slow_unaligned_access (SImode
,
10111 (MEM_ALIGN (operands
[1]) > 32
10112 ? 32 : MEM_ALIGN (operands
[1]))))
10113 && ! MEM_VOLATILE_P (operands
[0])
10114 && ! MEM_VOLATILE_P (operands
[1]))
10116 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10117 adjust_address (operands
[1], SImode
, 0));
10118 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10119 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
10123 if (can_create_pseudo_p () && MEM_P (operands
[0])
10124 && !gpc_reg_operand (operands
[1], mode
))
10125 operands
[1] = force_reg (mode
, operands
[1]);
10127 /* Recognize the case where operand[1] is a reference to thread-local
10128 data and load its address to a register. */
10129 if (tls_referenced_p (operands
[1]))
10131 enum tls_model model
;
10132 rtx tmp
= operands
[1];
10135 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
10137 addend
= XEXP (XEXP (tmp
, 0), 1);
10138 tmp
= XEXP (XEXP (tmp
, 0), 0);
10141 gcc_assert (SYMBOL_REF_P (tmp
));
10142 model
= SYMBOL_REF_TLS_MODEL (tmp
);
10143 gcc_assert (model
!= 0);
10145 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
10148 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
10149 tmp
= force_operand (tmp
, operands
[0]);
10154 /* 128-bit constant floating-point values on Darwin should really be loaded
10155 as two parts. However, this premature splitting is a problem when DFmode
10156 values can go into Altivec registers. */
10157 if (TARGET_MACHO
&& CONST_DOUBLE_P (operands
[1]) && FLOAT128_IBM_P (mode
)
10158 && !reg_addr
[DFmode
].scalar_in_vmx_p
)
10160 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
10161 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
10163 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
10164 GET_MODE_SIZE (DFmode
)),
10165 simplify_gen_subreg (DFmode
, operands
[1], mode
,
10166 GET_MODE_SIZE (DFmode
)),
10171 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10172 p1:SD) if p1 is not of floating point class and p0 is spilled as
10173 we can have no analogous movsd_store for this. */
10174 if (lra_in_progress
&& mode
== DDmode
10175 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10176 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10177 && SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1]))
10178 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
10181 int regno
= REGNO (SUBREG_REG (operands
[1]));
10183 if (!HARD_REGISTER_NUM_P (regno
))
10185 cl
= reg_preferred_class (regno
);
10186 regno
= reg_renumber
[regno
];
10188 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
10190 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10193 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
10194 operands
[1] = SUBREG_REG (operands
[1]);
10197 if (lra_in_progress
10199 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10200 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10201 && (REG_P (operands
[1])
10202 || (SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1])))))
10204 int regno
= reg_or_subregno (operands
[1]);
10207 if (!HARD_REGISTER_NUM_P (regno
))
10209 cl
= reg_preferred_class (regno
);
10210 gcc_assert (cl
!= NO_REGS
);
10211 regno
= reg_renumber
[regno
];
10213 regno
= ira_class_hard_regs
[cl
][0];
10215 if (FP_REGNO_P (regno
))
10217 if (GET_MODE (operands
[0]) != DDmode
)
10218 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
10219 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
10221 else if (INT_REGNO_P (regno
))
10222 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10227 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10228 p:DD)) if p0 is not of floating point class and p1 is spilled as
10229 we can have no analogous movsd_load for this. */
10230 if (lra_in_progress
&& mode
== DDmode
10231 && SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))
10232 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
10233 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10234 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10237 int regno
= REGNO (SUBREG_REG (operands
[0]));
10239 if (!HARD_REGISTER_NUM_P (regno
))
10241 cl
= reg_preferred_class (regno
);
10242 regno
= reg_renumber
[regno
];
10244 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
10246 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10249 operands
[0] = SUBREG_REG (operands
[0]);
10250 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
10253 if (lra_in_progress
10255 && (REG_P (operands
[0])
10256 || (SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))))
10257 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10258 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10260 int regno
= reg_or_subregno (operands
[0]);
10263 if (!HARD_REGISTER_NUM_P (regno
))
10265 cl
= reg_preferred_class (regno
);
10266 gcc_assert (cl
!= NO_REGS
);
10267 regno
= reg_renumber
[regno
];
10269 regno
= ira_class_hard_regs
[cl
][0];
10271 if (FP_REGNO_P (regno
))
10273 if (GET_MODE (operands
[1]) != DDmode
)
10274 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
10275 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
10277 else if (INT_REGNO_P (regno
))
10278 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10284 /* FIXME: In the long term, this switch statement should go away
10285 and be replaced by a sequence of tests based on things like
10291 if (CONSTANT_P (operands
[1])
10292 && !CONST_INT_P (operands
[1]))
10293 operands
[1] = force_const_mem (mode
, operands
[1]);
10300 if (FLOAT128_2REG_P (mode
))
10301 rs6000_eliminate_indexed_memrefs (operands
);
10308 if (CONSTANT_P (operands
[1])
10309 && ! easy_fp_constant (operands
[1], mode
))
10310 operands
[1] = force_const_mem (mode
, operands
[1]);
10320 if (CONSTANT_P (operands
[1])
10321 && !easy_vector_constant (operands
[1], mode
))
10322 operands
[1] = force_const_mem (mode
, operands
[1]);
10327 if (CONST_INT_P (operands
[1]) && INTVAL (operands
[1]) != 0)
10328 error ("%qs is an opaque type, and you can't set it to other values.",
10329 (mode
== OOmode
) ? "__vector_pair" : "__vector_quad");
10334 /* Use default pattern for address of ELF small data */
10337 && DEFAULT_ABI
== ABI_V4
10338 && (SYMBOL_REF_P (operands
[1])
10339 || GET_CODE (operands
[1]) == CONST
)
10340 && small_data_operand (operands
[1], mode
))
10342 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10346 /* Use the default pattern for loading up PC-relative addresses. */
10347 if (TARGET_PCREL
&& mode
== Pmode
10348 && pcrel_local_or_external_address (operands
[1], Pmode
))
10350 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10354 if (DEFAULT_ABI
== ABI_V4
10355 && mode
== Pmode
&& mode
== SImode
10356 && flag_pic
== 1 && got_operand (operands
[1], mode
))
10358 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
10362 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
10363 && TARGET_NO_TOC_OR_PCREL
10366 && CONSTANT_P (operands
[1])
10367 && GET_CODE (operands
[1]) != HIGH
10368 && !CONST_INT_P (operands
[1]))
10370 rtx target
= (!can_create_pseudo_p ()
10372 : gen_reg_rtx (mode
));
10374 /* If this is a function address on -mcall-aixdesc,
10375 convert it to the address of the descriptor. */
10376 if (DEFAULT_ABI
== ABI_AIX
10377 && SYMBOL_REF_P (operands
[1])
10378 && XSTR (operands
[1], 0)[0] == '.')
10380 const char *name
= XSTR (operands
[1], 0);
10382 while (*name
== '.')
10384 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
10385 CONSTANT_POOL_ADDRESS_P (new_ref
)
10386 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
10387 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
10388 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
10389 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
10390 operands
[1] = new_ref
;
10393 if (DEFAULT_ABI
== ABI_DARWIN
)
10396 /* This is not PIC code, but could require the subset of
10397 indirections used by mdynamic-no-pic. */
10398 if (MACHO_DYNAMIC_NO_PIC_P
)
10400 /* Take care of any required data indirection. */
10401 operands
[1] = rs6000_machopic_legitimize_pic_address (
10402 operands
[1], mode
, operands
[0]);
10403 if (operands
[0] != operands
[1])
10404 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10408 emit_insn (gen_macho_high (Pmode
, target
, operands
[1]));
10409 emit_insn (gen_macho_low (Pmode
, operands
[0],
10410 target
, operands
[1]));
10414 emit_insn (gen_elf_high (target
, operands
[1]));
10415 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
10419 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10420 and we have put it in the TOC, we just need to make a TOC-relative
10421 reference to it. */
10423 && SYMBOL_REF_P (operands
[1])
10424 && use_toc_relative_ref (operands
[1], mode
))
10425 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
10426 else if (mode
== Pmode
10427 && CONSTANT_P (operands
[1])
10428 && GET_CODE (operands
[1]) != HIGH
10429 && ((REG_P (operands
[0])
10430 && FP_REGNO_P (REGNO (operands
[0])))
10431 || !CONST_INT_P (operands
[1])
10432 || (num_insns_constant (operands
[1], mode
)
10433 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
10434 && !toc_relative_expr_p (operands
[1], false, NULL
, NULL
)
10435 && (TARGET_CMODEL
== CMODEL_SMALL
10436 || can_create_pseudo_p ()
10437 || (REG_P (operands
[0])
10438 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
10442 /* Darwin uses a special PIC legitimizer. */
10443 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
10446 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
10448 if (operands
[0] != operands
[1])
10449 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10454 /* If we are to limit the number of things we put in the TOC and
10455 this is a symbol plus a constant we can add in one insn,
10456 just put the symbol in the TOC and add the constant. */
10457 if (GET_CODE (operands
[1]) == CONST
10458 && TARGET_NO_SUM_IN_TOC
10459 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
10460 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
10461 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
10462 || SYMBOL_REF_P (XEXP (XEXP (operands
[1], 0), 0)))
10463 && ! side_effects_p (operands
[0]))
10466 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
10467 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
10469 sym
= force_reg (mode
, sym
);
10470 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
10474 operands
[1] = force_const_mem (mode
, operands
[1]);
10477 && SYMBOL_REF_P (XEXP (operands
[1], 0))
10478 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
10480 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
10482 operands
[1] = gen_const_mem (mode
, tocref
);
10483 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
10489 if (!VECTOR_MEM_VSX_P (TImode
))
10490 rs6000_eliminate_indexed_memrefs (operands
);
10494 rs6000_eliminate_indexed_memrefs (operands
);
10498 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
10501 /* Above, we may have called force_const_mem which may have returned
10502 an invalid address. If we can, fix this up; otherwise, reload will
10503 have to deal with it. */
10504 if (MEM_P (operands
[1]))
10505 operands
[1] = validize_mem (operands
[1]);
10507 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10511 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10513 init_float128_ibm (machine_mode mode
)
10515 if (!TARGET_XL_COMPAT
)
10517 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
10518 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
10519 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
10520 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
10522 if (!TARGET_HARD_FLOAT
)
10524 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
10525 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
10526 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
10527 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
10528 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
10529 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
10530 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
10531 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
10533 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
10534 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
10535 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
10536 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
10537 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
10538 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
10539 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
10540 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
10545 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
10546 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
10547 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
10548 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
10551 /* Add various conversions for IFmode to use the traditional TFmode
10553 if (mode
== IFmode
)
10555 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf");
10556 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf");
10557 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdtf");
10558 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd");
10559 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd");
10560 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtftd");
10562 if (TARGET_POWERPC64
)
10564 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
10565 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
10566 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
10567 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
10572 /* Create a decl for either complex long double multiply or complex long double
10573 divide when long double is IEEE 128-bit floating point. We can't use
10574 __multc3 and __divtc3 because the original long double using IBM extended
10575 double used those names. The complex multiply/divide functions are encoded
10576 as builtin functions with a complex result and 4 scalar inputs. */
10579 create_complex_muldiv (const char *name
, built_in_function fncode
, tree fntype
)
10581 tree fndecl
= add_builtin_function (name
, fntype
, fncode
, BUILT_IN_NORMAL
,
10584 set_builtin_decl (fncode
, fndecl
, true);
10586 if (TARGET_DEBUG_BUILTIN
)
10587 fprintf (stderr
, "create complex %s, fncode: %d\n", name
, (int) fncode
);
10592 /* Set up IEEE 128-bit floating point routines. Use different names if the
10593 arguments can be passed in a vector register. The historical PowerPC
10594 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10595 continue to use that if we aren't using vector registers to pass IEEE
10596 128-bit floating point. */
10599 init_float128_ieee (machine_mode mode
)
10601 if (FLOAT128_VECTOR_P (mode
))
10603 static bool complex_muldiv_init_p
= false;
10605 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10606 we have clone or target attributes, this will be called a second
10607 time. We want to create the built-in function only once. */
10608 if (mode
== TFmode
&& TARGET_IEEEQUAD
&& !complex_muldiv_init_p
)
10610 complex_muldiv_init_p
= true;
10611 built_in_function fncode_mul
=
10612 (built_in_function
) (BUILT_IN_COMPLEX_MUL_MIN
+ TCmode
10613 - MIN_MODE_COMPLEX_FLOAT
);
10614 built_in_function fncode_div
=
10615 (built_in_function
) (BUILT_IN_COMPLEX_DIV_MIN
+ TCmode
10616 - MIN_MODE_COMPLEX_FLOAT
);
10618 tree fntype
= build_function_type_list (complex_long_double_type_node
,
10619 long_double_type_node
,
10620 long_double_type_node
,
10621 long_double_type_node
,
10622 long_double_type_node
,
10625 create_complex_muldiv ("__mulkc3", fncode_mul
, fntype
);
10626 create_complex_muldiv ("__divkc3", fncode_div
, fntype
);
10629 set_optab_libfunc (add_optab
, mode
, "__addkf3");
10630 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
10631 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
10632 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
10633 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
10634 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
10635 set_optab_libfunc (abs_optab
, mode
, "__abskf2");
10636 set_optab_libfunc (powi_optab
, mode
, "__powikf2");
10638 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
10639 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
10640 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
10641 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
10642 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
10643 set_optab_libfunc (le_optab
, mode
, "__lekf2");
10644 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
10646 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
10647 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
10648 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
10649 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
10651 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__trunctfkf2");
10652 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
10653 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__trunctfkf2");
10655 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__extendkftf2");
10656 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
10657 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__extendkftf2");
10659 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf");
10660 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf");
10661 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdkf");
10662 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd");
10663 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd");
10664 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendkftd");
10666 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
10667 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
10668 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
10669 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
10671 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
10672 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
10673 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
10674 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
10676 if (TARGET_POWERPC64
)
10678 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti");
10679 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti");
10680 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf");
10681 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf");
10687 set_optab_libfunc (add_optab
, mode
, "_q_add");
10688 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
10689 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
10690 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
10691 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
10692 if (TARGET_PPC_GPOPT
)
10693 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
10695 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
10696 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
10697 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
10698 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
10699 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
10700 set_optab_libfunc (le_optab
, mode
, "_q_fle");
10702 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
10703 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
10704 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
10705 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
10706 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
10707 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
10708 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
10709 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
10714 rs6000_init_libfuncs (void)
10716 /* __float128 support. */
10717 if (TARGET_FLOAT128_TYPE
)
10719 init_float128_ibm (IFmode
);
10720 init_float128_ieee (KFmode
);
10723 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10724 if (TARGET_LONG_DOUBLE_128
)
10726 if (!TARGET_IEEEQUAD
)
10727 init_float128_ibm (TFmode
);
10729 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10731 init_float128_ieee (TFmode
);
10735 /* Emit a potentially record-form instruction, setting DST from SRC.
10736 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10737 signed comparison of DST with zero. If DOT is 1, the generated RTL
10738 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10739 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10740 a separate COMPARE. */
10743 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
10747 emit_move_insn (dst
, src
);
10751 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
10753 emit_move_insn (dst
, src
);
10754 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
10758 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
10761 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
10762 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
10766 rtx set
= gen_rtx_SET (dst
, src
);
10767 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
10772 /* A validation routine: say whether CODE, a condition code, and MODE
10773 match. The other alternatives either don't make sense or should
10774 never be generated. */
10777 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
10779 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
10780 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
10781 && GET_MODE_CLASS (mode
) == MODE_CC
);
10783 /* These don't make sense. */
10784 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
10785 || mode
!= CCUNSmode
);
10787 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
10788 || mode
== CCUNSmode
);
10790 gcc_assert (mode
== CCFPmode
10791 || (code
!= ORDERED
&& code
!= UNORDERED
10792 && code
!= UNEQ
&& code
!= LTGT
10793 && code
!= UNGT
&& code
!= UNLT
10794 && code
!= UNGE
&& code
!= UNLE
));
10796 /* These are invalid; the information is not there. */
10797 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
10801 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10802 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10803 not zero, store there the bit offset (counted from the right) where
10804 the single stretch of 1 bits begins; and similarly for B, the bit
10805 offset where it ends. */
10808 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
10810 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
10811 unsigned HOST_WIDE_INT bit
;
10813 int n
= GET_MODE_PRECISION (mode
);
10815 if (mode
!= DImode
&& mode
!= SImode
)
10818 if (INTVAL (mask
) >= 0)
10821 ne
= exact_log2 (bit
);
10822 nb
= exact_log2 (val
+ bit
);
10824 else if (val
+ 1 == 0)
10833 nb
= exact_log2 (bit
);
10834 ne
= exact_log2 (val
+ bit
);
10839 ne
= exact_log2 (bit
);
10840 if (val
+ bit
== 0)
10848 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
10859 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10860 or rldicr instruction, to implement an AND with it in mode MODE. */
10863 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
10867 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
10870 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10872 if (mode
== DImode
)
10873 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
10875 /* For SImode, rlwinm can do everything. */
10876 if (mode
== SImode
)
10877 return (nb
< 32 && ne
< 32);
10882 /* Return the instruction template for an AND with mask in mode MODE, with
10883 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10886 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
10890 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
10891 gcc_unreachable ();
10893 if (mode
== DImode
&& ne
== 0)
10895 operands
[3] = GEN_INT (63 - nb
);
10897 return "rldicl. %0,%1,0,%3";
10898 return "rldicl %0,%1,0,%3";
10901 if (mode
== DImode
&& nb
== 63)
10903 operands
[3] = GEN_INT (63 - ne
);
10905 return "rldicr. %0,%1,0,%3";
10906 return "rldicr %0,%1,0,%3";
10909 if (nb
< 32 && ne
< 32)
10911 operands
[3] = GEN_INT (31 - nb
);
10912 operands
[4] = GEN_INT (31 - ne
);
10914 return "rlwinm. %0,%1,0,%3,%4";
10915 return "rlwinm %0,%1,0,%3,%4";
10918 gcc_unreachable ();
10921 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10922 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10923 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10926 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
10930 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
10933 int n
= GET_MODE_PRECISION (mode
);
10936 if (CONST_INT_P (XEXP (shift
, 1)))
10938 sh
= INTVAL (XEXP (shift
, 1));
10939 if (sh
< 0 || sh
>= n
)
10943 rtx_code code
= GET_CODE (shift
);
10945 /* Convert any shift by 0 to a rotate, to simplify below code. */
10949 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10950 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
10952 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
10958 /* DImode rotates need rld*. */
10959 if (mode
== DImode
&& code
== ROTATE
)
10960 return (nb
== 63 || ne
== 0 || ne
== sh
);
10962 /* SImode rotates need rlw*. */
10963 if (mode
== SImode
&& code
== ROTATE
)
10964 return (nb
< 32 && ne
< 32 && sh
< 32);
10966 /* Wrap-around masks are only okay for rotates. */
10970 /* Variable shifts are only okay for rotates. */
10974 /* Don't allow ASHIFT if the mask is wrong for that. */
10975 if (code
== ASHIFT
&& ne
< sh
)
10978 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10979 if the mask is wrong for that. */
10980 if (nb
< 32 && ne
< 32 && sh
< 32
10981 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
10984 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10985 if the mask is wrong for that. */
10986 if (code
== LSHIFTRT
)
10988 if (nb
== 63 || ne
== 0 || ne
== sh
)
10989 return !(code
== LSHIFTRT
&& nb
>= sh
);
10994 /* Return the instruction template for a shift with mask in mode MODE, with
10995 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10998 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11002 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11003 gcc_unreachable ();
11005 if (mode
== DImode
&& ne
== 0)
11007 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11008 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
11009 operands
[3] = GEN_INT (63 - nb
);
11011 return "rld%I2cl. %0,%1,%2,%3";
11012 return "rld%I2cl %0,%1,%2,%3";
11015 if (mode
== DImode
&& nb
== 63)
11017 operands
[3] = GEN_INT (63 - ne
);
11019 return "rld%I2cr. %0,%1,%2,%3";
11020 return "rld%I2cr %0,%1,%2,%3";
11024 && GET_CODE (operands
[4]) != LSHIFTRT
11025 && CONST_INT_P (operands
[2])
11026 && ne
== INTVAL (operands
[2]))
11028 operands
[3] = GEN_INT (63 - nb
);
11030 return "rld%I2c. %0,%1,%2,%3";
11031 return "rld%I2c %0,%1,%2,%3";
11034 if (nb
< 32 && ne
< 32)
11036 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11037 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11038 operands
[3] = GEN_INT (31 - nb
);
11039 operands
[4] = GEN_INT (31 - ne
);
11040 /* This insn can also be a 64-bit rotate with mask that really makes
11041 it just a shift right (with mask); the %h below are to adjust for
11042 that situation (shift count is >= 32 in that case). */
11044 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11045 return "rlw%I2nm %0,%1,%h2,%3,%4";
11048 gcc_unreachable ();
11051 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11052 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11053 ASHIFT, or LSHIFTRT) in mode MODE. */
11056 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
11060 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11063 int n
= GET_MODE_PRECISION (mode
);
11065 int sh
= INTVAL (XEXP (shift
, 1));
11066 if (sh
< 0 || sh
>= n
)
11069 rtx_code code
= GET_CODE (shift
);
11071 /* Convert any shift by 0 to a rotate, to simplify below code. */
11075 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11076 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11078 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11084 /* DImode rotates need rldimi. */
11085 if (mode
== DImode
&& code
== ROTATE
)
11088 /* SImode rotates need rlwimi. */
11089 if (mode
== SImode
&& code
== ROTATE
)
11090 return (nb
< 32 && ne
< 32 && sh
< 32);
11092 /* Wrap-around masks are only okay for rotates. */
11096 /* Don't allow ASHIFT if the mask is wrong for that. */
11097 if (code
== ASHIFT
&& ne
< sh
)
11100 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11101 if the mask is wrong for that. */
11102 if (nb
< 32 && ne
< 32 && sh
< 32
11103 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11106 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11107 if the mask is wrong for that. */
11108 if (code
== LSHIFTRT
)
11111 return !(code
== LSHIFTRT
&& nb
>= sh
);
11116 /* Return the instruction template for an insert with mask in mode MODE, with
11117 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11120 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11124 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11125 gcc_unreachable ();
11127 /* Prefer rldimi because rlwimi is cracked. */
11128 if (TARGET_POWERPC64
11129 && (!dot
|| mode
== DImode
)
11130 && GET_CODE (operands
[4]) != LSHIFTRT
11131 && ne
== INTVAL (operands
[2]))
11133 operands
[3] = GEN_INT (63 - nb
);
11135 return "rldimi. %0,%1,%2,%3";
11136 return "rldimi %0,%1,%2,%3";
11139 if (nb
< 32 && ne
< 32)
11141 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11142 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11143 operands
[3] = GEN_INT (31 - nb
);
11144 operands
[4] = GEN_INT (31 - ne
);
11146 return "rlwimi. %0,%1,%2,%3,%4";
11147 return "rlwimi %0,%1,%2,%3,%4";
11150 gcc_unreachable ();
11153 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11154 using two machine instructions. */
11157 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
11159 /* There are two kinds of AND we can handle with two insns:
11160 1) those we can do with two rl* insn;
11163 We do not handle that last case yet. */
11165 /* If there is just one stretch of ones, we can do it. */
11166 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
11169 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11170 one insn, we can do the whole thing with two. */
11171 unsigned HOST_WIDE_INT val
= INTVAL (c
);
11172 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11173 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11174 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11175 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11176 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
11179 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11180 If EXPAND is true, split rotate-and-mask instructions we generate to
11181 their constituent parts as well (this is used during expand); if DOT
11182 is 1, make the last insn a record-form instruction clobbering the
11183 destination GPR and setting the CC reg (from operands[3]); if 2, set
11184 that GPR as well as the CC reg. */
11187 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
11189 gcc_assert (!(expand
&& dot
));
11191 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
11193 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11194 shift right. This generates better code than doing the masks without
11195 shifts, or shifting first right and then left. */
11197 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
11199 gcc_assert (mode
== DImode
);
11201 int shift
= 63 - nb
;
11204 rtx tmp1
= gen_reg_rtx (DImode
);
11205 rtx tmp2
= gen_reg_rtx (DImode
);
11206 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
11207 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
11208 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
11212 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
11213 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
11214 emit_move_insn (operands
[0], tmp
);
11215 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
11216 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11221 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11222 that does the rest. */
11223 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11224 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11225 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11226 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11228 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
11229 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
11231 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
11233 /* Two "no-rotate"-and-mask instructions, for SImode. */
11234 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
11236 gcc_assert (mode
== SImode
);
11238 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11239 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
11240 emit_move_insn (reg
, tmp
);
11241 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11242 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11246 gcc_assert (mode
== DImode
);
11248 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11249 insns; we have to do the first in SImode, because it wraps. */
11250 if (mask2
<= 0xffffffff
11251 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
11253 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11254 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
11256 rtx reg_low
= gen_lowpart (SImode
, reg
);
11257 emit_move_insn (reg_low
, tmp
);
11258 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11259 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11263 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11264 at the top end), rotate back and clear the other hole. */
11265 int right
= exact_log2 (bit3
);
11266 int left
= 64 - right
;
11268 /* Rotate the mask too. */
11269 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
11273 rtx tmp1
= gen_reg_rtx (DImode
);
11274 rtx tmp2
= gen_reg_rtx (DImode
);
11275 rtx tmp3
= gen_reg_rtx (DImode
);
11276 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
11277 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
11278 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
11279 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
11283 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
11284 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
11285 emit_move_insn (operands
[0], tmp
);
11286 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
11287 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
11288 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11292 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11293 for lfq and stfq insns iff the registers are hard registers. */
11296 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
11298 /* We might have been passed a SUBREG. */
11299 if (!REG_P (reg1
) || !REG_P (reg2
))
11302 /* We might have been passed non floating point registers. */
11303 if (!FP_REGNO_P (REGNO (reg1
))
11304 || !FP_REGNO_P (REGNO (reg2
)))
11307 return (REGNO (reg1
) == REGNO (reg2
) - 1);
11310 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11311 addr1 and addr2 must be in consecutive memory locations
11312 (addr2 == addr1 + 8). */
11315 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
11318 unsigned int reg1
, reg2
;
11319 int offset1
, offset2
;
11321 /* The mems cannot be volatile. */
11322 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
11325 addr1
= XEXP (mem1
, 0);
11326 addr2
= XEXP (mem2
, 0);
11328 /* Extract an offset (if used) from the first addr. */
11329 if (GET_CODE (addr1
) == PLUS
)
11331 /* If not a REG, return zero. */
11332 if (!REG_P (XEXP (addr1
, 0)))
11336 reg1
= REGNO (XEXP (addr1
, 0));
11337 /* The offset must be constant! */
11338 if (!CONST_INT_P (XEXP (addr1
, 1)))
11340 offset1
= INTVAL (XEXP (addr1
, 1));
11343 else if (!REG_P (addr1
))
11347 reg1
= REGNO (addr1
);
11348 /* This was a simple (mem (reg)) expression. Offset is 0. */
11352 /* And now for the second addr. */
11353 if (GET_CODE (addr2
) == PLUS
)
11355 /* If not a REG, return zero. */
11356 if (!REG_P (XEXP (addr2
, 0)))
11360 reg2
= REGNO (XEXP (addr2
, 0));
11361 /* The offset must be constant. */
11362 if (!CONST_INT_P (XEXP (addr2
, 1)))
11364 offset2
= INTVAL (XEXP (addr2
, 1));
11367 else if (!REG_P (addr2
))
11371 reg2
= REGNO (addr2
);
11372 /* This was a simple (mem (reg)) expression. Offset is 0. */
11376 /* Both of these must have the same base register. */
11380 /* The offset for the second addr must be 8 more than the first addr. */
11381 if (offset2
!= offset1
+ 8)
11384 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11389 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11390 need to use DDmode, in all other cases we can use the same mode. */
11391 static machine_mode
11392 rs6000_secondary_memory_needed_mode (machine_mode mode
)
11394 if (lra_in_progress
&& mode
== SDmode
)
11399 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11400 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11401 only work on the traditional altivec registers, note if an altivec register
11404 static enum rs6000_reg_type
11405 register_to_reg_type (rtx reg
, bool *is_altivec
)
11407 HOST_WIDE_INT regno
;
11408 enum reg_class rclass
;
11410 if (SUBREG_P (reg
))
11411 reg
= SUBREG_REG (reg
);
11414 return NO_REG_TYPE
;
11416 regno
= REGNO (reg
);
11417 if (!HARD_REGISTER_NUM_P (regno
))
11419 if (!lra_in_progress
&& !reload_completed
)
11420 return PSEUDO_REG_TYPE
;
11422 regno
= true_regnum (reg
);
11423 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
))
11424 return PSEUDO_REG_TYPE
;
11427 gcc_assert (regno
>= 0);
11429 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
11430 *is_altivec
= true;
11432 rclass
= rs6000_regno_regclass
[regno
];
11433 return reg_class_to_reg_type
[(int)rclass
];
11436 /* Helper function to return the cost of adding a TOC entry address. */
11439 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
11443 if (TARGET_CMODEL
!= CMODEL_SMALL
)
11444 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
11447 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
11452 /* Helper function for rs6000_secondary_reload to determine whether the memory
11453 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11454 needs reloading. Return negative if the memory is not handled by the memory
11455 helper functions and to try a different reload method, 0 if no additional
11456 instructions are need, and positive to give the extra cost for the
11460 rs6000_secondary_reload_memory (rtx addr
,
11461 enum reg_class rclass
,
11464 int extra_cost
= 0;
11465 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
11466 addr_mask_type addr_mask
;
11467 const char *type
= NULL
;
11468 const char *fail_msg
= NULL
;
11470 if (GPR_REG_CLASS_P (rclass
))
11471 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
11473 else if (rclass
== FLOAT_REGS
)
11474 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
11476 else if (rclass
== ALTIVEC_REGS
)
11477 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
11479 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11480 else if (rclass
== VSX_REGS
)
11481 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
11482 & ~RELOAD_REG_AND_M16
);
11484 /* If the register allocator hasn't made up its mind yet on the register
11485 class to use, settle on defaults to use. */
11486 else if (rclass
== NO_REGS
)
11488 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
11489 & ~RELOAD_REG_AND_M16
);
11491 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
11492 addr_mask
&= ~(RELOAD_REG_INDEXED
11493 | RELOAD_REG_PRE_INCDEC
11494 | RELOAD_REG_PRE_MODIFY
);
11500 /* If the register isn't valid in this register class, just return now. */
11501 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
11503 if (TARGET_DEBUG_ADDR
)
11506 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11507 "not valid in class\n",
11508 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
11515 switch (GET_CODE (addr
))
11517 /* Does the register class supports auto update forms for this mode? We
11518 don't need a scratch register, since the powerpc only supports
11519 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11522 reg
= XEXP (addr
, 0);
11523 if (!base_reg_operand (addr
, GET_MODE (reg
)))
11525 fail_msg
= "no base register #1";
11529 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
11537 reg
= XEXP (addr
, 0);
11538 plus_arg1
= XEXP (addr
, 1);
11539 if (!base_reg_operand (reg
, GET_MODE (reg
))
11540 || GET_CODE (plus_arg1
) != PLUS
11541 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
11543 fail_msg
= "bad PRE_MODIFY";
11547 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
11554 /* Do we need to simulate AND -16 to clear the bottom address bits used
11555 in VMX load/stores? Only allow the AND for vector sizes. */
11557 and_arg
= XEXP (addr
, 0);
11558 if (GET_MODE_SIZE (mode
) != 16
11559 || !CONST_INT_P (XEXP (addr
, 1))
11560 || INTVAL (XEXP (addr
, 1)) != -16)
11562 fail_msg
= "bad Altivec AND #1";
11566 if (rclass
!= ALTIVEC_REGS
)
11568 if (legitimate_indirect_address_p (and_arg
, false))
11571 else if (legitimate_indexed_address_p (and_arg
, false))
11576 fail_msg
= "bad Altivec AND #2";
11584 /* If this is an indirect address, make sure it is a base register. */
11587 if (!legitimate_indirect_address_p (addr
, false))
11594 /* If this is an indexed address, make sure the register class can handle
11595 indexed addresses for this mode. */
11597 plus_arg0
= XEXP (addr
, 0);
11598 plus_arg1
= XEXP (addr
, 1);
11600 /* (plus (plus (reg) (constant)) (constant)) is generated during
11601 push_reload processing, so handle it now. */
11602 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
11604 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11611 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11612 push_reload processing, so handle it now. */
11613 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
11615 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
11618 type
= "indexed #2";
11622 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
11624 fail_msg
= "no base register #2";
11628 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
11630 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
11631 || !legitimate_indexed_address_p (addr
, false))
11638 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
11639 && CONST_INT_P (plus_arg1
))
11641 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
11644 type
= "vector d-form offset";
11648 /* Make sure the register class can handle offset addresses. */
11649 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
11651 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11654 type
= "offset #2";
11660 fail_msg
= "bad PLUS";
11667 /* Quad offsets are restricted and can't handle normal addresses. */
11668 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
11671 type
= "vector d-form lo_sum";
11674 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
11676 fail_msg
= "bad LO_SUM";
11680 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11687 /* Static addresses need to create a TOC entry. */
11691 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
11694 type
= "vector d-form lo_sum #2";
11700 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
11704 /* TOC references look like offsetable memory. */
11706 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
11708 fail_msg
= "bad UNSPEC";
11712 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
11715 type
= "vector d-form lo_sum #3";
11718 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
11721 type
= "toc reference";
11727 fail_msg
= "bad address";
11732 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
11734 if (extra_cost
< 0)
11736 "rs6000_secondary_reload_memory error: mode = %s, "
11737 "class = %s, addr_mask = '%s', %s\n",
11738 GET_MODE_NAME (mode
),
11739 reg_class_names
[rclass
],
11740 rs6000_debug_addr_mask (addr_mask
, false),
11741 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
11745 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11746 "addr_mask = '%s', extra cost = %d, %s\n",
11747 GET_MODE_NAME (mode
),
11748 reg_class_names
[rclass
],
11749 rs6000_debug_addr_mask (addr_mask
, false),
11751 (type
) ? type
: "<none>");
11759 /* Helper function for rs6000_secondary_reload to return true if a move to a
11760 different register classe is really a simple move. */
11763 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
11764 enum rs6000_reg_type from_type
,
11767 int size
= GET_MODE_SIZE (mode
);
11769 /* Add support for various direct moves available. In this function, we only
11770 look at cases where we don't need any extra registers, and one or more
11771 simple move insns are issued. Originally small integers are not allowed
11772 in FPR/VSX registers. Single precision binary floating is not a simple
11773 move because we need to convert to the single precision memory layout.
11774 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11775 need special direct move handling, which we do not support yet. */
11776 if (TARGET_DIRECT_MOVE
11777 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
11778 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
11780 if (TARGET_POWERPC64
)
11782 /* ISA 2.07: MTVSRD or MVFVSRD. */
11786 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11787 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
11791 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11792 if (TARGET_P8_VECTOR
)
11794 if (mode
== SImode
)
11797 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
11801 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11802 if (mode
== SDmode
)
11806 /* Move to/from SPR. */
11807 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
11808 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
11809 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
11815 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11816 special direct moves that involve allocating an extra register, return the
11817 insn code of the helper function if there is such a function or
11818 CODE_FOR_nothing if not. */
11821 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
11822 enum rs6000_reg_type from_type
,
11824 secondary_reload_info
*sri
,
11828 enum insn_code icode
= CODE_FOR_nothing
;
11830 int size
= GET_MODE_SIZE (mode
);
11832 if (TARGET_POWERPC64
&& size
== 16)
11834 /* Handle moving 128-bit values from GPRs to VSX point registers on
11835 ISA 2.07 (power8, power9) when running in 64-bit mode using
11836 XXPERMDI to glue the two 64-bit values back together. */
11837 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
11839 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
11840 icode
= reg_addr
[mode
].reload_vsx_gpr
;
11843 /* Handle moving 128-bit values from VSX point registers to GPRs on
11844 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11845 bottom 64-bit value. */
11846 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
11848 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
11849 icode
= reg_addr
[mode
].reload_gpr_vsx
;
11853 else if (TARGET_POWERPC64
&& mode
== SFmode
)
11855 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
11857 cost
= 3; /* xscvdpspn, mfvsrd, and. */
11858 icode
= reg_addr
[mode
].reload_gpr_vsx
;
11861 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
11863 cost
= 2; /* mtvsrz, xscvspdpn. */
11864 icode
= reg_addr
[mode
].reload_vsx_gpr
;
11868 else if (!TARGET_POWERPC64
&& size
== 8)
11870 /* Handle moving 64-bit values from GPRs to floating point registers on
11871 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11872 32-bit values back together. Altivec register classes must be handled
11873 specially since a different instruction is used, and the secondary
11874 reload support requires a single instruction class in the scratch
11875 register constraint. However, right now TFmode is not allowed in
11876 Altivec registers, so the pattern will never match. */
11877 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
11879 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
11880 icode
= reg_addr
[mode
].reload_fpr_gpr
;
11884 if (icode
!= CODE_FOR_nothing
)
11889 sri
->icode
= icode
;
11890 sri
->extra_cost
= cost
;
11897 /* Return whether a move between two register classes can be done either
11898 directly (simple move) or via a pattern that uses a single extra temporary
11899 (using ISA 2.07's direct move in this case. */
11902 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
11903 enum rs6000_reg_type from_type
,
11905 secondary_reload_info
*sri
,
11908 /* Fall back to load/store reloads if either type is not a register. */
11909 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
11912 /* If we haven't allocated registers yet, assume the move can be done for the
11913 standard register types. */
11914 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
11915 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
11916 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
11919 /* Moves to the same set of registers is a simple move for non-specialized
11921 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
11924 /* Check whether a simple move can be done directly. */
11925 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
11929 sri
->icode
= CODE_FOR_nothing
;
11930 sri
->extra_cost
= 0;
11935 /* Now check if we can do it in a few steps. */
11936 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
11940 /* Inform reload about cases where moving X with a mode MODE to a register in
11941 RCLASS requires an extra scratch or immediate register. Return the class
11942 needed for the immediate register.
11944 For VSX and Altivec, we may need a register to convert sp+offset into
11947 For misaligned 64-bit gpr loads and stores we need a register to
11948 convert an offset address to indirect. */
11951 rs6000_secondary_reload (bool in_p
,
11953 reg_class_t rclass_i
,
11955 secondary_reload_info
*sri
)
11957 enum reg_class rclass
= (enum reg_class
) rclass_i
;
11958 reg_class_t ret
= ALL_REGS
;
11959 enum insn_code icode
;
11960 bool default_p
= false;
11961 bool done_p
= false;
11963 /* Allow subreg of memory before/during reload. */
11964 bool memory_p
= (MEM_P (x
)
11965 || (!reload_completed
&& SUBREG_P (x
)
11966 && MEM_P (SUBREG_REG (x
))));
11968 sri
->icode
= CODE_FOR_nothing
;
11969 sri
->t_icode
= CODE_FOR_nothing
;
11970 sri
->extra_cost
= 0;
11972 ? reg_addr
[mode
].reload_load
11973 : reg_addr
[mode
].reload_store
);
11975 if (REG_P (x
) || register_operand (x
, mode
))
11977 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
11978 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
11979 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
11982 std::swap (to_type
, from_type
);
11984 /* Can we do a direct move of some sort? */
11985 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
11988 icode
= (enum insn_code
)sri
->icode
;
11995 /* Make sure 0.0 is not reloaded or forced into memory. */
11996 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
12003 /* If this is a scalar floating point value and we want to load it into the
12004 traditional Altivec registers, do it via a move via a traditional floating
12005 point register, unless we have D-form addressing. Also make sure that
12006 non-zero constants use a FPR. */
12007 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
12008 && !mode_supports_vmx_dform (mode
)
12009 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12010 && (memory_p
|| CONST_DOUBLE_P (x
)))
12017 /* Handle reload of load/stores if we have reload helper functions. */
12018 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
12020 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
12023 if (extra_cost
>= 0)
12027 if (extra_cost
> 0)
12029 sri
->extra_cost
= extra_cost
;
12030 sri
->icode
= icode
;
12035 /* Handle unaligned loads and stores of integer registers. */
12036 if (!done_p
&& TARGET_POWERPC64
12037 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12039 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
12041 rtx addr
= XEXP (x
, 0);
12042 rtx off
= address_offset (addr
);
12044 if (off
!= NULL_RTX
)
12046 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12047 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12049 /* We need a secondary reload when our legitimate_address_p
12050 says the address is good (as otherwise the entire address
12051 will be reloaded), and the offset is not a multiple of
12052 four or we have an address wrap. Address wrap will only
12053 occur for LO_SUMs since legitimate_offset_address_p
12054 rejects addresses for 16-byte mems that will wrap. */
12055 if (GET_CODE (addr
) == LO_SUM
12056 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12057 && ((offset
& 3) != 0
12058 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
12059 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
12060 && (offset
& 3) != 0))
12062 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12064 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
12065 : CODE_FOR_reload_di_load
);
12067 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
12068 : CODE_FOR_reload_di_store
);
12069 sri
->extra_cost
= 2;
12080 if (!done_p
&& !TARGET_POWERPC64
12081 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12083 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
12085 rtx addr
= XEXP (x
, 0);
12086 rtx off
= address_offset (addr
);
12088 if (off
!= NULL_RTX
)
12090 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12091 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12093 /* We need a secondary reload when our legitimate_address_p
12094 says the address is good (as otherwise the entire address
12095 will be reloaded), and we have a wrap.
12097 legitimate_lo_sum_address_p allows LO_SUM addresses to
12098 have any offset so test for wrap in the low 16 bits.
12100 legitimate_offset_address_p checks for the range
12101 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12102 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12103 [0x7ff4,0x7fff] respectively, so test for the
12104 intersection of these ranges, [0x7ffc,0x7fff] and
12105 [0x7ff4,0x7ff7] respectively.
12107 Note that the address we see here may have been
12108 manipulated by legitimize_reload_address. */
12109 if (GET_CODE (addr
) == LO_SUM
12110 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
12111 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
12114 sri
->icode
= CODE_FOR_reload_si_load
;
12116 sri
->icode
= CODE_FOR_reload_si_store
;
12117 sri
->extra_cost
= 2;
12132 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
12134 gcc_assert (ret
!= ALL_REGS
);
12136 if (TARGET_DEBUG_ADDR
)
12139 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12141 reg_class_names
[ret
],
12142 in_p
? "true" : "false",
12143 reg_class_names
[rclass
],
12144 GET_MODE_NAME (mode
));
12146 if (reload_completed
)
12147 fputs (", after reload", stderr
);
12150 fputs (", done_p not set", stderr
);
12153 fputs (", default secondary reload", stderr
);
12155 if (sri
->icode
!= CODE_FOR_nothing
)
12156 fprintf (stderr
, ", reload func = %s, extra cost = %d",
12157 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
12159 else if (sri
->extra_cost
> 0)
12160 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
12162 fputs ("\n", stderr
);
12169 /* Better tracing for rs6000_secondary_reload_inner. */
12172 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
12177 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
12179 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
12180 store_p
? "store" : "load");
12183 set
= gen_rtx_SET (mem
, reg
);
12185 set
= gen_rtx_SET (reg
, mem
);
12187 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
12188 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
12191 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
12192 ATTRIBUTE_NORETURN
;
12195 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
12198 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
12199 gcc_unreachable ();
12202 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12203 reload helper functions. These were identified in
12204 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12205 reload, it calls the insns:
12206 reload_<RELOAD:mode>_<P:mptrsize>_store
12207 reload_<RELOAD:mode>_<P:mptrsize>_load
12209 which in turn calls this function, to do whatever is necessary to create
12210 valid addresses. */
12213 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
12215 int regno
= true_regnum (reg
);
12216 machine_mode mode
= GET_MODE (reg
);
12217 addr_mask_type addr_mask
;
12220 rtx op_reg
, op0
, op1
;
12225 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
) || !MEM_P (mem
)
12226 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
12227 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12229 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
12230 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12232 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
12233 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12235 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
12236 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12239 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12241 /* Make sure the mode is valid in this register class. */
12242 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12243 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12245 if (TARGET_DEBUG_ADDR
)
12246 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
12248 new_addr
= addr
= XEXP (mem
, 0);
12249 switch (GET_CODE (addr
))
12251 /* Does the register class support auto update forms for this mode? If
12252 not, do the update now. We don't need a scratch register, since the
12253 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12256 op_reg
= XEXP (addr
, 0);
12257 if (!base_reg_operand (op_reg
, Pmode
))
12258 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12260 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12262 int delta
= GET_MODE_SIZE (mode
);
12263 if (GET_CODE (addr
) == PRE_DEC
)
12265 emit_insn (gen_add2_insn (op_reg
, GEN_INT (delta
)));
12271 op0
= XEXP (addr
, 0);
12272 op1
= XEXP (addr
, 1);
12273 if (!base_reg_operand (op0
, Pmode
)
12274 || GET_CODE (op1
) != PLUS
12275 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
12276 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12278 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12280 emit_insn (gen_rtx_SET (op0
, op1
));
12285 /* Do we need to simulate AND -16 to clear the bottom address bits used
12286 in VMX load/stores? */
12288 op0
= XEXP (addr
, 0);
12289 op1
= XEXP (addr
, 1);
12290 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
12292 if (REG_P (op0
) || SUBREG_P (op0
))
12295 else if (GET_CODE (op1
) == PLUS
)
12297 emit_insn (gen_rtx_SET (scratch
, op1
));
12302 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12304 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
12305 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
12306 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
12307 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
12308 new_addr
= scratch
;
12312 /* If this is an indirect address, make sure it is a base register. */
12315 if (!base_reg_operand (addr
, GET_MODE (addr
)))
12317 emit_insn (gen_rtx_SET (scratch
, addr
));
12318 new_addr
= scratch
;
12322 /* If this is an indexed address, make sure the register class can handle
12323 indexed addresses for this mode. */
12325 op0
= XEXP (addr
, 0);
12326 op1
= XEXP (addr
, 1);
12327 if (!base_reg_operand (op0
, Pmode
))
12328 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12330 else if (int_reg_operand (op1
, Pmode
))
12332 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12334 emit_insn (gen_rtx_SET (scratch
, addr
));
12335 new_addr
= scratch
;
12339 else if (mode_supports_dq_form (mode
) && CONST_INT_P (op1
))
12341 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
12342 || !quad_address_p (addr
, mode
, false))
12344 emit_insn (gen_rtx_SET (scratch
, addr
));
12345 new_addr
= scratch
;
12349 /* Make sure the register class can handle offset addresses. */
12350 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12352 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12354 emit_insn (gen_rtx_SET (scratch
, addr
));
12355 new_addr
= scratch
;
12360 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12365 op0
= XEXP (addr
, 0);
12366 op1
= XEXP (addr
, 1);
12367 if (!base_reg_operand (op0
, Pmode
))
12368 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12370 else if (int_reg_operand (op1
, Pmode
))
12372 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12374 emit_insn (gen_rtx_SET (scratch
, addr
));
12375 new_addr
= scratch
;
12379 /* Quad offsets are restricted and can't handle normal addresses. */
12380 else if (mode_supports_dq_form (mode
))
12382 emit_insn (gen_rtx_SET (scratch
, addr
));
12383 new_addr
= scratch
;
12386 /* Make sure the register class can handle offset addresses. */
12387 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
12389 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12391 emit_insn (gen_rtx_SET (scratch
, addr
));
12392 new_addr
= scratch
;
12397 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12404 rs6000_emit_move (scratch
, addr
, Pmode
);
12405 new_addr
= scratch
;
12409 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12412 /* Adjust the address if it changed. */
12413 if (addr
!= new_addr
)
12415 mem
= replace_equiv_address_nv (mem
, new_addr
);
12416 if (TARGET_DEBUG_ADDR
)
12417 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12420 /* Now create the move. */
12422 emit_insn (gen_rtx_SET (mem
, reg
));
12424 emit_insn (gen_rtx_SET (reg
, mem
));
12429 /* Convert reloads involving 64-bit gprs and misaligned offset
12430 addressing, or multiple 32-bit gprs and offsets that are too large,
12431 to use indirect addressing. */
12434 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
12436 int regno
= true_regnum (reg
);
12437 enum reg_class rclass
;
12439 rtx scratch_or_premodify
= scratch
;
12441 if (TARGET_DEBUG_ADDR
)
12443 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
12444 store_p
? "store" : "load");
12445 fprintf (stderr
, "reg:\n");
12447 fprintf (stderr
, "mem:\n");
12449 fprintf (stderr
, "scratch:\n");
12450 debug_rtx (scratch
);
12453 gcc_assert (regno
>= 0 && HARD_REGISTER_NUM_P (regno
));
12454 gcc_assert (MEM_P (mem
));
12455 rclass
= REGNO_REG_CLASS (regno
);
12456 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
12457 addr
= XEXP (mem
, 0);
12459 if (GET_CODE (addr
) == PRE_MODIFY
)
12461 gcc_assert (REG_P (XEXP (addr
, 0))
12462 && GET_CODE (XEXP (addr
, 1)) == PLUS
12463 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
12464 scratch_or_premodify
= XEXP (addr
, 0);
12465 addr
= XEXP (addr
, 1);
12467 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
12469 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
12471 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
12473 /* Now create the move. */
12475 emit_insn (gen_rtx_SET (mem
, reg
));
12477 emit_insn (gen_rtx_SET (reg
, mem
));
12482 /* Given an rtx X being reloaded into a reg required to be
12483 in class CLASS, return the class of reg to actually use.
12484 In general this is just CLASS; but on some machines
12485 in some cases it is preferable to use a more restrictive class.
12487 On the RS/6000, we have to return NO_REGS when we want to reload a
12488 floating-point CONST_DOUBLE to force it to be copied to memory.
12490 We also don't want to reload integer values into floating-point
12491 registers if we can at all help it. In fact, this can
12492 cause reload to die, if it tries to generate a reload of CTR
12493 into a FP register and discovers it doesn't have the memory location
12496 ??? Would it be a good idea to have reload do the converse, that is
12497 try to reload floating modes into FP registers if possible?
12500 static enum reg_class
12501 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
12503 machine_mode mode
= GET_MODE (x
);
12504 bool is_constant
= CONSTANT_P (x
);
12506 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12507 reload class for it. */
12508 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
12509 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
12512 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
12513 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
12516 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12517 the reloading of address expressions using PLUS into floating point
12519 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
12523 /* Zero is always allowed in all VSX registers. */
12524 if (x
== CONST0_RTX (mode
))
12527 /* If this is a vector constant that can be formed with a few Altivec
12528 instructions, we want altivec registers. */
12529 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
12530 return ALTIVEC_REGS
;
12532 /* If this is an integer constant that can easily be loaded into
12533 vector registers, allow it. */
12534 if (CONST_INT_P (x
))
12536 HOST_WIDE_INT value
= INTVAL (x
);
12538 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12539 2.06 can generate it in the Altivec registers with
12543 if (TARGET_P8_VECTOR
)
12545 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
12546 return ALTIVEC_REGS
;
12551 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12552 a sign extend in the Altivec registers. */
12553 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
12554 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
12555 return ALTIVEC_REGS
;
12558 /* Force constant to memory. */
12562 /* D-form addressing can easily reload the value. */
12563 if (mode_supports_vmx_dform (mode
)
12564 || mode_supports_dq_form (mode
))
12567 /* If this is a scalar floating point value and we don't have D-form
12568 addressing, prefer the traditional floating point registers so that we
12569 can use D-form (register+offset) addressing. */
12570 if (rclass
== VSX_REGS
12571 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
12574 /* Prefer the Altivec registers if Altivec is handling the vector
12575 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12577 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
12578 || mode
== V1TImode
)
12579 return ALTIVEC_REGS
;
12584 if (is_constant
|| GET_CODE (x
) == PLUS
)
12586 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
12587 return GENERAL_REGS
;
12588 if (reg_class_subset_p (BASE_REGS
, rclass
))
12593 /* For the vector pair and vector quad modes, prefer their natural register
12594 (VSX or FPR) rather than GPR registers. For other integer types, prefer
12595 the GPR registers. */
12596 if (rclass
== GEN_OR_FLOAT_REGS
)
12598 if (mode
== OOmode
)
12601 if (mode
== XOmode
)
12604 if (GET_MODE_CLASS (mode
) == MODE_INT
)
12605 return GENERAL_REGS
;
12611 /* Debug version of rs6000_preferred_reload_class. */
12612 static enum reg_class
12613 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
12615 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
12618 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12620 reg_class_names
[ret
], reg_class_names
[rclass
],
12621 GET_MODE_NAME (GET_MODE (x
)));
12627 /* If we are copying between FP or AltiVec registers and anything else, we need
12628 a memory location. The exception is when we are targeting ppc64 and the
12629 move to/from fpr to gpr instructions are available. Also, under VSX, you
12630 can copy vector registers from the FP register set to the Altivec register
12631 set and vice versa. */
12634 rs6000_secondary_memory_needed (machine_mode mode
,
12635 reg_class_t from_class
,
12636 reg_class_t to_class
)
12638 enum rs6000_reg_type from_type
, to_type
;
12639 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
12640 || (to_class
== ALTIVEC_REGS
));
12642 /* If a simple/direct move is available, we don't need secondary memory */
12643 from_type
= reg_class_to_reg_type
[(int)from_class
];
12644 to_type
= reg_class_to_reg_type
[(int)to_class
];
12646 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
12647 (secondary_reload_info
*)0, altivec_p
))
12650 /* If we have a floating point or vector register class, we need to use
12651 memory to transfer the data. */
12652 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
12658 /* Debug version of rs6000_secondary_memory_needed. */
12660 rs6000_debug_secondary_memory_needed (machine_mode mode
,
12661 reg_class_t from_class
,
12662 reg_class_t to_class
)
12664 bool ret
= rs6000_secondary_memory_needed (mode
, from_class
, to_class
);
12667 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12668 "to_class = %s, mode = %s\n",
12669 ret
? "true" : "false",
12670 reg_class_names
[from_class
],
12671 reg_class_names
[to_class
],
12672 GET_MODE_NAME (mode
));
12677 /* Return the register class of a scratch register needed to copy IN into
12678 or out of a register in RCLASS in MODE. If it can be done directly,
12679 NO_REGS is returned. */
12681 static enum reg_class
12682 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
12687 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
12689 && MACHOPIC_INDIRECT
12693 /* We cannot copy a symbolic operand directly into anything
12694 other than BASE_REGS for TARGET_ELF. So indicate that a
12695 register from BASE_REGS is needed as an intermediate
12698 On Darwin, pic addresses require a load from memory, which
12699 needs a base register. */
12700 if (rclass
!= BASE_REGS
12701 && (SYMBOL_REF_P (in
)
12702 || GET_CODE (in
) == HIGH
12703 || GET_CODE (in
) == LABEL_REF
12704 || GET_CODE (in
) == CONST
))
12710 regno
= REGNO (in
);
12711 if (!HARD_REGISTER_NUM_P (regno
))
12713 regno
= true_regnum (in
);
12714 if (!HARD_REGISTER_NUM_P (regno
))
12718 else if (SUBREG_P (in
))
12720 regno
= true_regnum (in
);
12721 if (!HARD_REGISTER_NUM_P (regno
))
12727 /* If we have VSX register moves, prefer moving scalar values between
12728 Altivec registers and GPR by going via an FPR (and then via memory)
12729 instead of reloading the secondary memory address for Altivec moves. */
12731 && GET_MODE_SIZE (mode
) < 16
12732 && !mode_supports_vmx_dform (mode
)
12733 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
12734 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
12735 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12736 && (regno
>= 0 && INT_REGNO_P (regno
)))))
12739 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12741 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
12742 || (regno
>= 0 && INT_REGNO_P (regno
)))
12745 /* Constants, memory, and VSX registers can go into VSX registers (both the
12746 traditional floating point and the altivec registers). */
12747 if (rclass
== VSX_REGS
12748 && (regno
== -1 || VSX_REGNO_P (regno
)))
12751 /* Constants, memory, and FP registers can go into FP registers. */
12752 if ((regno
== -1 || FP_REGNO_P (regno
))
12753 && (rclass
== FLOAT_REGS
|| rclass
== GEN_OR_FLOAT_REGS
))
12754 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
12756 /* Memory, and AltiVec registers can go into AltiVec registers. */
12757 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
12758 && rclass
== ALTIVEC_REGS
)
12761 /* We can copy among the CR registers. */
12762 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
12763 && regno
>= 0 && CR_REGNO_P (regno
))
12766 /* Otherwise, we need GENERAL_REGS. */
12767 return GENERAL_REGS
;
12770 /* Debug version of rs6000_secondary_reload_class. */
12771 static enum reg_class
12772 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
12773 machine_mode mode
, rtx in
)
12775 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
12777 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12778 "mode = %s, input rtx:\n",
12779 reg_class_names
[ret
], reg_class_names
[rclass
],
12780 GET_MODE_NAME (mode
));
12786 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12789 rs6000_can_change_mode_class (machine_mode from
,
12791 reg_class_t rclass
)
12793 unsigned from_size
= GET_MODE_SIZE (from
);
12794 unsigned to_size
= GET_MODE_SIZE (to
);
12796 if (from_size
!= to_size
)
12798 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
12800 if (reg_classes_intersect_p (xclass
, rclass
))
12802 unsigned to_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, to
);
12803 unsigned from_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, from
);
12804 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
12805 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
12807 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12808 single register under VSX because the scalar part of the register
12809 is in the upper 64-bits, and not the lower 64-bits. Types like
12810 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12811 IEEE floating point can't overlap, and neither can small
12814 if (to_float128_vector_p
&& from_float128_vector_p
)
12817 else if (to_float128_vector_p
|| from_float128_vector_p
)
12820 /* TDmode in floating-mode registers must always go into a register
12821 pair with the most significant word in the even-numbered register
12822 to match ISA requirements. In little-endian mode, this does not
12823 match subreg numbering, so we cannot allow subregs. */
12824 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
12827 /* Allow SD<->DD changes, since SDmode values are stored in
12828 the low half of the DDmode, just like target-independent
12829 code expects. We need to allow at least SD->DD since
12830 rs6000_secondary_memory_needed_mode asks for that change
12831 to be made for SD reloads. */
12832 if ((to
== DDmode
&& from
== SDmode
)
12833 || (to
== SDmode
&& from
== DDmode
))
12836 if (from_size
< 8 || to_size
< 8)
12839 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
12842 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
12851 /* Since the VSX register set includes traditional floating point registers
12852 and altivec registers, just check for the size being different instead of
12853 trying to check whether the modes are vector modes. Otherwise it won't
12854 allow say DF and DI to change classes. For types like TFmode and TDmode
12855 that take 2 64-bit registers, rather than a single 128-bit register, don't
12856 allow subregs of those types to other 128 bit types. */
12857 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
12859 unsigned num_regs
= (from_size
+ 15) / 16;
12860 if (hard_regno_nregs (FIRST_FPR_REGNO
, to
) > num_regs
12861 || hard_regno_nregs (FIRST_FPR_REGNO
, from
) > num_regs
)
12864 return (from_size
== 8 || from_size
== 16);
12867 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
12868 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
12874 /* Debug version of rs6000_can_change_mode_class. */
12876 rs6000_debug_can_change_mode_class (machine_mode from
,
12878 reg_class_t rclass
)
12880 bool ret
= rs6000_can_change_mode_class (from
, to
, rclass
);
12883 "rs6000_can_change_mode_class, return %s, from = %s, "
12884 "to = %s, rclass = %s\n",
12885 ret
? "true" : "false",
12886 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
12887 reg_class_names
[rclass
]);
12892 /* Return a string to do a move operation of 128 bits of data. */
12895 rs6000_output_move_128bit (rtx operands
[])
12897 rtx dest
= operands
[0];
12898 rtx src
= operands
[1];
12899 machine_mode mode
= GET_MODE (dest
);
12902 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
12903 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
12907 dest_regno
= REGNO (dest
);
12908 dest_gpr_p
= INT_REGNO_P (dest_regno
);
12909 dest_fp_p
= FP_REGNO_P (dest_regno
);
12910 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
12911 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
12916 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
12921 src_regno
= REGNO (src
);
12922 src_gpr_p
= INT_REGNO_P (src_regno
);
12923 src_fp_p
= FP_REGNO_P (src_regno
);
12924 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
12925 src_vsx_p
= src_fp_p
| src_vmx_p
;
12930 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
12933 /* Register moves. */
12934 if (dest_regno
>= 0 && src_regno
>= 0)
12941 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
12942 return (WORDS_BIG_ENDIAN
12943 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12944 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12946 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
12950 else if (TARGET_VSX
&& dest_vsx_p
)
12953 return "xxlor %x0,%x1,%x1";
12955 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
12956 return (WORDS_BIG_ENDIAN
12957 ? "mtvsrdd %x0,%1,%L1"
12958 : "mtvsrdd %x0,%L1,%1");
12960 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
12964 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
12965 return "vor %0,%1,%1";
12967 else if (dest_fp_p
&& src_fp_p
)
12972 else if (dest_regno
>= 0 && MEM_P (src
))
12976 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
12982 else if (TARGET_ALTIVEC
&& dest_vmx_p
12983 && altivec_indexed_or_indirect_operand (src
, mode
))
12984 return "lvx %0,%y1";
12986 else if (TARGET_VSX
&& dest_vsx_p
)
12988 if (mode_supports_dq_form (mode
)
12989 && quad_address_p (XEXP (src
, 0), mode
, true))
12990 return "lxv %x0,%1";
12992 else if (TARGET_P9_VECTOR
)
12993 return "lxvx %x0,%y1";
12995 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
12996 return "lxvw4x %x0,%y1";
12999 return "lxvd2x %x0,%y1";
13002 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
13003 return "lvx %0,%y1";
13005 else if (dest_fp_p
)
13010 else if (src_regno
>= 0 && MEM_P (dest
))
13014 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13015 return "stq %1,%0";
13020 else if (TARGET_ALTIVEC
&& src_vmx_p
13021 && altivec_indexed_or_indirect_operand (dest
, mode
))
13022 return "stvx %1,%y0";
13024 else if (TARGET_VSX
&& src_vsx_p
)
13026 if (mode_supports_dq_form (mode
)
13027 && quad_address_p (XEXP (dest
, 0), mode
, true))
13028 return "stxv %x1,%0";
13030 else if (TARGET_P9_VECTOR
)
13031 return "stxvx %x1,%y0";
13033 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13034 return "stxvw4x %x1,%y0";
13037 return "stxvd2x %x1,%y0";
13040 else if (TARGET_ALTIVEC
&& src_vmx_p
)
13041 return "stvx %1,%y0";
13048 else if (dest_regno
>= 0
13049 && (CONST_INT_P (src
)
13050 || CONST_WIDE_INT_P (src
)
13051 || CONST_DOUBLE_P (src
)
13052 || GET_CODE (src
) == CONST_VECTOR
))
13057 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
13058 || (dest_vsx_p
&& TARGET_VSX
))
13059 return output_vec_const_move (operands
);
13062 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
13065 /* Validate a 128-bit move. */
13067 rs6000_move_128bit_ok_p (rtx operands
[])
13069 machine_mode mode
= GET_MODE (operands
[0]);
13070 return (gpc_reg_operand (operands
[0], mode
)
13071 || gpc_reg_operand (operands
[1], mode
));
13074 /* Return true if a 128-bit move needs to be split. */
13076 rs6000_split_128bit_ok_p (rtx operands
[])
13078 if (!reload_completed
)
13081 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
13084 if (quad_load_store_p (operands
[0], operands
[1]))
13091 /* Given a comparison operation, return the bit number in CCR to test. We
13092 know this is a valid comparison.
13094 SCC_P is 1 if this is for an scc. That means that %D will have been
13095 used instead of %C, so the bits will be in different places.
13097 Return -1 if OP isn't a valid comparison for some reason. */
13100 ccr_bit (rtx op
, int scc_p
)
13102 enum rtx_code code
= GET_CODE (op
);
13103 machine_mode cc_mode
;
13108 if (!COMPARISON_P (op
))
13111 reg
= XEXP (op
, 0);
13113 if (!REG_P (reg
) || !CR_REGNO_P (REGNO (reg
)))
13116 cc_mode
= GET_MODE (reg
);
13117 cc_regnum
= REGNO (reg
);
13118 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
13120 validate_condition_mode (code
, cc_mode
);
13122 /* When generating a sCOND operation, only positive conditions are
13141 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
13143 return base_bit
+ 2;
13144 case GT
: case GTU
: case UNLE
:
13145 return base_bit
+ 1;
13146 case LT
: case LTU
: case UNGE
:
13148 case ORDERED
: case UNORDERED
:
13149 return base_bit
+ 3;
13152 /* If scc, we will have done a cror to put the bit in the
13153 unordered position. So test that bit. For integer, this is ! LT
13154 unless this is an scc insn. */
13155 return scc_p
? base_bit
+ 3 : base_bit
;
13158 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
13165 /* Return the GOT register. */
13168 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
13170 /* The second flow pass currently (June 1999) can't update
13171 regs_ever_live without disturbing other parts of the compiler, so
13172 update it here to make the prolog/epilogue code happy. */
13173 if (!can_create_pseudo_p ()
13174 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
13175 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
13177 crtl
->uses_pic_offset_table
= 1;
13179 return pic_offset_table_rtx
;
13182 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13184 /* Write out a function code label. */
13187 rs6000_output_function_entry (FILE *file
, const char *fname
)
13189 if (fname
[0] != '.')
13191 switch (DEFAULT_ABI
)
13194 gcc_unreachable ();
13200 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
13210 RS6000_OUTPUT_BASENAME (file
, fname
);
13213 /* Print an operand. Recognize special options, documented below. */
13216 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13217 only introduced by the linker, when applying the sda21
13219 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13220 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13222 #define SMALL_DATA_RELOC "sda21"
13223 #define SMALL_DATA_REG 0
13227 print_operand (FILE *file
, rtx x
, int code
)
13230 unsigned HOST_WIDE_INT uval
;
13234 /* %a is output_address. */
13236 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13240 /* Write the MMA accumulator number associated with VSX register X. */
13241 if (!REG_P (x
) || !FP_REGNO_P (REGNO (x
)) || (REGNO (x
) % 4) != 0)
13242 output_operand_lossage ("invalid %%A value");
13244 fprintf (file
, "%d", (REGNO (x
) - FIRST_FPR_REGNO
) / 4);
13248 /* Like 'J' but get to the GT bit only. */
13249 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13251 output_operand_lossage ("invalid %%D value");
13255 /* Bit 1 is GT bit. */
13256 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
13258 /* Add one for shift count in rlinm for scc. */
13259 fprintf (file
, "%d", i
+ 1);
13263 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13266 output_operand_lossage ("invalid %%e value");
13271 if ((uval
& 0xffff) == 0 && uval
!= 0)
13276 /* X is a CR register. Print the number of the EQ bit of the CR */
13277 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13278 output_operand_lossage ("invalid %%E value");
13280 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
13284 /* X is a CR register. Print the shift count needed to move it
13285 to the high-order four bits. */
13286 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13287 output_operand_lossage ("invalid %%f value");
13289 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
13293 /* Similar, but print the count for the rotate in the opposite
13295 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13296 output_operand_lossage ("invalid %%F value");
13298 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
13302 /* X is a constant integer. If it is negative, print "m",
13303 otherwise print "z". This is to make an aze or ame insn. */
13304 if (!CONST_INT_P (x
))
13305 output_operand_lossage ("invalid %%G value");
13306 else if (INTVAL (x
) >= 0)
13313 /* If constant, output low-order five bits. Otherwise, write
13316 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
13318 print_operand (file
, x
, 0);
13322 /* If constant, output low-order six bits. Otherwise, write
13325 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
13327 print_operand (file
, x
, 0);
13331 /* Print `i' if this is a constant, else nothing. */
13337 /* Write the bit number in CCR for jump. */
13338 i
= ccr_bit (x
, 0);
13340 output_operand_lossage ("invalid %%j code");
13342 fprintf (file
, "%d", i
);
13346 /* Similar, but add one for shift count in rlinm for scc and pass
13347 scc flag to `ccr_bit'. */
13348 i
= ccr_bit (x
, 1);
13350 output_operand_lossage ("invalid %%J code");
13352 /* If we want bit 31, write a shift count of zero, not 32. */
13353 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
13357 /* X must be a constant. Write the 1's complement of the
13360 output_operand_lossage ("invalid %%k value");
13362 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
13366 /* X must be a symbolic constant on ELF. Write an
13367 expression suitable for an 'addi' that adds in the low 16
13368 bits of the MEM. */
13369 if (GET_CODE (x
) == CONST
)
13371 if (GET_CODE (XEXP (x
, 0)) != PLUS
13372 || (!SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
13373 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
13374 || !CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
13375 output_operand_lossage ("invalid %%K value");
13377 print_operand_address (file
, x
);
13378 fputs ("@l", file
);
13381 /* %l is output_asm_label. */
13384 /* Write second word of DImode or DFmode reference. Works on register
13385 or non-indexed memory only. */
13387 fputs (reg_names
[REGNO (x
) + 1], file
);
13388 else if (MEM_P (x
))
13390 machine_mode mode
= GET_MODE (x
);
13391 /* Handle possible auto-increment. Since it is pre-increment and
13392 we have already done it, we can just use an offset of word. */
13393 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
13394 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13395 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
13397 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13398 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
13401 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
13405 if (small_data_operand (x
, GET_MODE (x
)))
13406 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13407 reg_names
[SMALL_DATA_REG
]);
13411 case 'N': /* Unused */
13412 /* Write the number of elements in the vector times 4. */
13413 if (GET_CODE (x
) != PARALLEL
)
13414 output_operand_lossage ("invalid %%N value");
13416 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
13419 case 'O': /* Unused */
13420 /* Similar, but subtract 1 first. */
13421 if (GET_CODE (x
) != PARALLEL
)
13422 output_operand_lossage ("invalid %%O value");
13424 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
13428 /* X is a CONST_INT that is a power of two. Output the logarithm. */
13431 || (i
= exact_log2 (INTVAL (x
))) < 0)
13432 output_operand_lossage ("invalid %%p value");
13434 fprintf (file
, "%d", i
);
13438 /* The operand must be an indirect memory reference. The result
13439 is the register name. */
13440 if (!MEM_P (x
) || !REG_P (XEXP (x
, 0))
13441 || REGNO (XEXP (x
, 0)) >= 32)
13442 output_operand_lossage ("invalid %%P value");
13444 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
13448 /* This outputs the logical code corresponding to a boolean
13449 expression. The expression may have one or both operands
13450 negated (if one, only the first one). For condition register
13451 logical operations, it will also treat the negated
13452 CR codes as NOTs, but not handle NOTs of them. */
13454 const char *const *t
= 0;
13456 enum rtx_code code
= GET_CODE (x
);
13457 static const char * const tbl
[3][3] = {
13458 { "and", "andc", "nor" },
13459 { "or", "orc", "nand" },
13460 { "xor", "eqv", "xor" } };
13464 else if (code
== IOR
)
13466 else if (code
== XOR
)
13469 output_operand_lossage ("invalid %%q value");
13471 if (GET_CODE (XEXP (x
, 0)) != NOT
)
13475 if (GET_CODE (XEXP (x
, 1)) == NOT
)
13486 if (! TARGET_MFCRF
)
13492 /* X is a CR register. Print the mask for `mtcrf'. */
13493 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13494 output_operand_lossage ("invalid %%R value");
13496 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
13500 /* Low 5 bits of 32 - value */
13502 output_operand_lossage ("invalid %%s value");
13504 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
13508 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13509 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13511 output_operand_lossage ("invalid %%t value");
13515 /* Bit 3 is OV bit. */
13516 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
13518 /* If we want bit 31, write a shift count of zero, not 32. */
13519 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
13523 /* Print the symbolic name of a branch target register. */
13524 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
13525 x
= XVECEXP (x
, 0, 0);
13526 if (!REG_P (x
) || (REGNO (x
) != LR_REGNO
13527 && REGNO (x
) != CTR_REGNO
))
13528 output_operand_lossage ("invalid %%T value");
13529 else if (REGNO (x
) == LR_REGNO
)
13530 fputs ("lr", file
);
13532 fputs ("ctr", file
);
13536 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13537 for use in unsigned operand. */
13540 output_operand_lossage ("invalid %%u value");
13545 if ((uval
& 0xffff) == 0)
13548 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
13552 /* High-order 16 bits of constant for use in signed operand. */
13554 output_operand_lossage ("invalid %%v value");
13556 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
13557 (INTVAL (x
) >> 16) & 0xffff);
13561 /* Print `u' if this has an auto-increment or auto-decrement. */
13563 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
13564 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
13565 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
13570 /* Print the trap code for this operand. */
13571 switch (GET_CODE (x
))
13574 fputs ("eq", file
); /* 4 */
13577 fputs ("ne", file
); /* 24 */
13580 fputs ("lt", file
); /* 16 */
13583 fputs ("le", file
); /* 20 */
13586 fputs ("gt", file
); /* 8 */
13589 fputs ("ge", file
); /* 12 */
13592 fputs ("llt", file
); /* 2 */
13595 fputs ("lle", file
); /* 6 */
13598 fputs ("lgt", file
); /* 1 */
13601 fputs ("lge", file
); /* 5 */
13604 output_operand_lossage ("invalid %%V value");
13609 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13612 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
13613 ((INTVAL (x
) & 0xffff) ^ 0x8000) - 0x8000);
13615 print_operand (file
, x
, 0);
13619 /* X is a FPR or Altivec register used in a VSX context. */
13620 if (!REG_P (x
) || !VSX_REGNO_P (REGNO (x
)))
13621 output_operand_lossage ("invalid %%x value");
13624 int reg
= REGNO (x
);
13625 int vsx_reg
= (FP_REGNO_P (reg
)
13627 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
13629 #ifdef TARGET_REGNAMES
13630 if (TARGET_REGNAMES
)
13631 fprintf (file
, "%%vs%d", vsx_reg
);
13634 fprintf (file
, "%d", vsx_reg
);
13640 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
13641 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
13642 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
13647 /* Like 'L', for third word of TImode/PTImode */
13649 fputs (reg_names
[REGNO (x
) + 2], file
);
13650 else if (MEM_P (x
))
13652 machine_mode mode
= GET_MODE (x
);
13653 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
13654 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13655 output_address (mode
, plus_constant (Pmode
,
13656 XEXP (XEXP (x
, 0), 0), 8));
13657 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13658 output_address (mode
, plus_constant (Pmode
,
13659 XEXP (XEXP (x
, 0), 0), 8));
13661 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
13662 if (small_data_operand (x
, GET_MODE (x
)))
13663 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13664 reg_names
[SMALL_DATA_REG
]);
13669 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
13670 x
= XVECEXP (x
, 0, 1);
13671 /* X is a SYMBOL_REF. Write out the name preceded by a
13672 period and without any trailing data in brackets. Used for function
13673 names. If we are configured for System V (or the embedded ABI) on
13674 the PowerPC, do not emit the period, since those systems do not use
13675 TOCs and the like. */
13676 if (!SYMBOL_REF_P (x
))
13678 output_operand_lossage ("invalid %%z value");
13682 /* For macho, check to see if we need a stub. */
13685 const char *name
= XSTR (x
, 0);
13687 if (darwin_symbol_stubs
13688 && MACHOPIC_INDIRECT
13689 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13690 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13692 assemble_name (file
, name
);
13694 else if (!DOT_SYMBOLS
)
13695 assemble_name (file
, XSTR (x
, 0));
13697 rs6000_output_function_entry (file
, XSTR (x
, 0));
13701 /* Like 'L', for last word of TImode/PTImode. */
13703 fputs (reg_names
[REGNO (x
) + 3], file
);
13704 else if (MEM_P (x
))
13706 machine_mode mode
= GET_MODE (x
);
13707 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
13708 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13709 output_address (mode
, plus_constant (Pmode
,
13710 XEXP (XEXP (x
, 0), 0), 12));
13711 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13712 output_address (mode
, plus_constant (Pmode
,
13713 XEXP (XEXP (x
, 0), 0), 12));
13715 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
13716 if (small_data_operand (x
, GET_MODE (x
)))
13717 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13718 reg_names
[SMALL_DATA_REG
]);
13722 /* Print AltiVec memory operand. */
13727 gcc_assert (MEM_P (x
));
13731 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x
))
13732 && GET_CODE (tmp
) == AND
13733 && CONST_INT_P (XEXP (tmp
, 1))
13734 && INTVAL (XEXP (tmp
, 1)) == -16)
13735 tmp
= XEXP (tmp
, 0);
13736 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
13737 && GET_CODE (tmp
) == PRE_MODIFY
)
13738 tmp
= XEXP (tmp
, 1);
13740 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
13743 if (GET_CODE (tmp
) != PLUS
13744 || !REG_P (XEXP (tmp
, 0))
13745 || !REG_P (XEXP (tmp
, 1)))
13747 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13751 if (REGNO (XEXP (tmp
, 0)) == 0)
13752 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
13753 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
13755 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
13756 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
13763 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
13764 else if (MEM_P (x
))
13766 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13767 know the width from the mode. */
13768 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
13769 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
13770 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
13771 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13772 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
13773 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
13774 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13775 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
13777 output_address (GET_MODE (x
), XEXP (x
, 0));
13779 else if (toc_relative_expr_p (x
, false,
13780 &tocrel_base_oac
, &tocrel_offset_oac
))
13781 /* This hack along with a corresponding hack in
13782 rs6000_output_addr_const_extra arranges to output addends
13783 where the assembler expects to find them. eg.
13784 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13785 without this hack would be output as "x@toc+4". We
13787 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
13788 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
13789 output_addr_const (file
, XVECEXP (x
, 0, 0));
13790 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
13791 output_addr_const (file
, XVECEXP (x
, 0, 1));
13793 output_addr_const (file
, x
);
13797 if (const char *name
= get_some_local_dynamic_name ())
13798 assemble_name (file
, name
);
13800 output_operand_lossage ("'%%&' used without any "
13801 "local dynamic TLS references");
13805 output_operand_lossage ("invalid %%xn code");
13809 /* Print the address of an operand. */
13812 print_operand_address (FILE *file
, rtx x
)
13815 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
13817 /* Is it a PC-relative address? */
13818 else if (TARGET_PCREL
&& pcrel_local_or_external_address (x
, VOIDmode
))
13820 HOST_WIDE_INT offset
;
13822 if (GET_CODE (x
) == CONST
)
13825 if (GET_CODE (x
) == PLUS
)
13827 offset
= INTVAL (XEXP (x
, 1));
13833 output_addr_const (file
, x
);
13836 fprintf (file
, "%+" PRId64
, offset
);
13838 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_LOCAL_P (x
))
13839 fprintf (file
, "@got");
13841 fprintf (file
, "@pcrel");
13843 else if (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
13844 || GET_CODE (x
) == LABEL_REF
)
13846 output_addr_const (file
, x
);
13847 if (small_data_operand (x
, GET_MODE (x
)))
13848 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13849 reg_names
[SMALL_DATA_REG
]);
13851 gcc_assert (!TARGET_TOC
);
13853 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
13854 && REG_P (XEXP (x
, 1)))
13856 if (REGNO (XEXP (x
, 0)) == 0)
13857 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
13858 reg_names
[ REGNO (XEXP (x
, 0)) ]);
13860 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
13861 reg_names
[ REGNO (XEXP (x
, 1)) ]);
13863 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
13864 && CONST_INT_P (XEXP (x
, 1)))
13865 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
13866 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
13868 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
13869 && CONSTANT_P (XEXP (x
, 1)))
13871 fprintf (file
, "lo16(");
13872 output_addr_const (file
, XEXP (x
, 1));
13873 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
13877 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
13878 && CONSTANT_P (XEXP (x
, 1)))
13880 output_addr_const (file
, XEXP (x
, 1));
13881 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
13884 else if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
13886 /* This hack along with a corresponding hack in
13887 rs6000_output_addr_const_extra arranges to output addends
13888 where the assembler expects to find them. eg.
13890 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13891 without this hack would be output as "x@toc+8@l(9)". We
13892 want "x+8@toc@l(9)". */
13893 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
13894 if (GET_CODE (x
) == LO_SUM
)
13895 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
13897 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base_oac
, 0, 1))]);
13900 output_addr_const (file
, x
);
13903 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13906 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
13908 if (GET_CODE (x
) == UNSPEC
)
13909 switch (XINT (x
, 1))
13911 case UNSPEC_TOCREL
:
13912 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x
, 0, 0))
13913 && REG_P (XVECEXP (x
, 0, 1))
13914 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
13915 output_addr_const (file
, XVECEXP (x
, 0, 0));
13916 if (x
== tocrel_base_oac
&& tocrel_offset_oac
!= const0_rtx
)
13918 if (INTVAL (tocrel_offset_oac
) >= 0)
13919 fprintf (file
, "+");
13920 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset_oac
));
13922 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
13925 assemble_name (file
, toc_label_name
);
13928 else if (TARGET_ELF
)
13929 fputs ("@toc", file
);
13933 case UNSPEC_MACHOPIC_OFFSET
:
13934 output_addr_const (file
, XVECEXP (x
, 0, 0));
13936 machopic_output_function_base_name (file
);
13943 /* Target hook for assembling integer objects. The PowerPC version has
13944 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13945 is defined. It also needs to handle DI-mode objects on 64-bit
13949 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
13951 #ifdef RELOCATABLE_NEEDS_FIXUP
13952 /* Special handling for SI values. */
13953 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
13955 static int recurse
= 0;
13957 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13958 the .fixup section. Since the TOC section is already relocated, we
13959 don't need to mark it here. We used to skip the text section, but it
13960 should never be valid for relocated addresses to be placed in the text
13962 if (DEFAULT_ABI
== ABI_V4
13963 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
13964 && in_section
!= toc_section
13966 && !CONST_SCALAR_INT_P (x
)
13972 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
13974 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
13975 fprintf (asm_out_file
, "\t.long\t(");
13976 output_addr_const (asm_out_file
, x
);
13977 fprintf (asm_out_file
, ")@fixup\n");
13978 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
13979 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
13980 fprintf (asm_out_file
, "\t.long\t");
13981 assemble_name (asm_out_file
, buf
);
13982 fprintf (asm_out_file
, "\n\t.previous\n");
13986 /* Remove initial .'s to turn a -mcall-aixdesc function
13987 address into the address of the descriptor, not the function
13989 else if (SYMBOL_REF_P (x
)
13990 && XSTR (x
, 0)[0] == '.'
13991 && DEFAULT_ABI
== ABI_AIX
)
13993 const char *name
= XSTR (x
, 0);
13994 while (*name
== '.')
13997 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
14001 #endif /* RELOCATABLE_NEEDS_FIXUP */
14002 return default_assemble_integer (x
, size
, aligned_p
);
14005 /* Return a template string for assembly to emit when making an
14006 external call. FUNOP is the call mem argument operand number. */
14008 static const char *
14009 rs6000_call_template_1 (rtx
*operands
, unsigned int funop
, bool sibcall
)
14011 /* -Wformat-overflow workaround, without which gcc thinks that %u
14012 might produce 10 digits. */
14013 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14017 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14019 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14020 sprintf (arg
, "(%%%u@tlsgd)", funop
+ 1);
14021 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14022 sprintf (arg
, "(%%&@tlsld)");
14025 /* The magic 32768 offset here corresponds to the offset of
14026 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14028 sprintf (z
, "%%z%u%s", funop
,
14029 (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
== 2
14032 static char str
[32]; /* 1 spare */
14033 if (rs6000_pcrel_p ())
14034 sprintf (str
, "b%s %s@notoc%s", sibcall
? "" : "l", z
, arg
);
14035 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
14036 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14037 sibcall
? "" : "\n\tnop");
14038 else if (DEFAULT_ABI
== ABI_V4
)
14039 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14040 flag_pic
? "@plt" : "");
14042 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14043 else if (DEFAULT_ABI
== ABI_DARWIN
)
14045 /* The cookie is in operand func+2. */
14046 gcc_checking_assert (GET_CODE (operands
[funop
+ 2]) == CONST_INT
);
14047 int cookie
= INTVAL (operands
[funop
+ 2]);
14048 if (cookie
& CALL_LONG
)
14050 tree funname
= get_identifier (XSTR (operands
[funop
], 0));
14051 tree labelname
= get_prev_label (funname
);
14052 gcc_checking_assert (labelname
&& !sibcall
);
14054 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14055 instruction will reach 'foo', otherwise link as 'bl L42'".
14056 "L42" should be a 'branch island', that will do a far jump to
14057 'foo'. Branch islands are generated in
14058 macho_branch_islands(). */
14059 sprintf (str
, "jbsr %%z%u,%.10s", funop
,
14060 IDENTIFIER_POINTER (labelname
));
14063 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14065 sprintf (str
, "b%s %s%s", sibcall
? "" : "l", z
, arg
);
14069 gcc_unreachable ();
14074 rs6000_call_template (rtx
*operands
, unsigned int funop
)
14076 return rs6000_call_template_1 (operands
, funop
, false);
14080 rs6000_sibcall_template (rtx
*operands
, unsigned int funop
)
14082 return rs6000_call_template_1 (operands
, funop
, true);
14085 /* As above, for indirect calls. */
14087 static const char *
14088 rs6000_indirect_call_template_1 (rtx
*operands
, unsigned int funop
,
14091 /* -Wformat-overflow workaround, without which gcc thinks that %u
14092 might produce 10 digits. Note that -Wformat-overflow will not
14093 currently warn here for str[], so do not rely on a warning to
14094 ensure str[] is correctly sized. */
14095 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14097 /* Currently, funop is either 0 or 1. The maximum string is always
14098 a !speculate 64-bit __tls_get_addr call.
14101 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14102 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14104 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14105 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14112 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14113 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14115 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14116 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14123 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14124 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14126 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14127 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14134 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14135 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14137 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14138 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14142 static char str
[160]; /* 8 spare */
14144 const char *ptrload
= TARGET_64BIT
? "d" : "wz";
14146 if (DEFAULT_ABI
== ABI_AIX
)
14149 ptrload
, funop
+ 3);
14151 /* We don't need the extra code to stop indirect call speculation if
14153 bool speculate
= (TARGET_MACHO
14154 || rs6000_speculate_indirect_jumps
14155 || (REG_P (operands
[funop
])
14156 && REGNO (operands
[funop
]) == LR_REGNO
));
14158 if (TARGET_PLTSEQ
&& GET_CODE (operands
[funop
]) == UNSPEC
)
14160 const char *rel64
= TARGET_64BIT
? "64" : "";
14163 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14165 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14166 sprintf (tls
, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14168 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14169 sprintf (tls
, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14173 const char *notoc
= rs6000_pcrel_p () ? "_NOTOC" : "";
14174 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14175 && flag_pic
== 2 ? "+32768" : "");
14179 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14180 tls
, rel64
, notoc
, funop
, addend
);
14181 s
+= sprintf (s
, "crset 2\n\t");
14184 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14185 tls
, rel64
, notoc
, funop
, addend
);
14187 else if (!speculate
)
14188 s
+= sprintf (s
, "crset 2\n\t");
14190 if (rs6000_pcrel_p ())
14193 sprintf (s
, "b%%T%ul", funop
);
14195 sprintf (s
, "beq%%T%ul-", funop
);
14197 else if (DEFAULT_ABI
== ABI_AIX
)
14203 funop
, ptrload
, funop
+ 4);
14208 funop
, ptrload
, funop
+ 4);
14210 else if (DEFAULT_ABI
== ABI_ELFv2
)
14216 funop
, ptrload
, funop
+ 3);
14221 funop
, ptrload
, funop
+ 3);
14228 funop
, sibcall
? "" : "l");
14232 funop
, sibcall
? "" : "l", sibcall
? "\n\tb $" : "");
14238 rs6000_indirect_call_template (rtx
*operands
, unsigned int funop
)
14240 return rs6000_indirect_call_template_1 (operands
, funop
, false);
14244 rs6000_indirect_sibcall_template (rtx
*operands
, unsigned int funop
)
14246 return rs6000_indirect_call_template_1 (operands
, funop
, true);
14250 /* Output indirect call insns. WHICH identifies the type of sequence. */
14252 rs6000_pltseq_template (rtx
*operands
, int which
)
14254 const char *rel64
= TARGET_64BIT
? "64" : "";
14257 if (GET_CODE (operands
[3]) == UNSPEC
)
14259 char off
= which
== RS6000_PLTSEQ_PLT_PCREL34
? '8' : '4';
14260 if (XINT (operands
[3], 1) == UNSPEC_TLSGD
)
14261 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14263 else if (XINT (operands
[3], 1) == UNSPEC_TLSLD
)
14264 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14268 gcc_assert (DEFAULT_ABI
== ABI_ELFv2
|| DEFAULT_ABI
== ABI_V4
);
14269 static char str
[96]; /* 10 spare */
14270 char off
= WORDS_BIG_ENDIAN
? '2' : '4';
14271 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14272 && flag_pic
== 2 ? "+32768" : "");
14275 case RS6000_PLTSEQ_TOCSAVE
:
14278 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14279 TARGET_64BIT
? "d 2,24(1)" : "w 2,12(1)",
14282 case RS6000_PLTSEQ_PLT16_HA
:
14283 if (DEFAULT_ABI
== ABI_V4
&& !flag_pic
)
14286 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14290 "addis %%0,%%1,0\n\t"
14291 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14292 tls
, off
, rel64
, addend
);
14294 case RS6000_PLTSEQ_PLT16_LO
:
14296 "l%s %%0,0(%%1)\n\t"
14297 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14298 TARGET_64BIT
? "d" : "wz",
14299 tls
, off
, rel64
, TARGET_64BIT
? "_DS" : "", addend
);
14301 case RS6000_PLTSEQ_MTCTR
:
14304 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14305 tls
, rel64
, addend
);
14307 case RS6000_PLTSEQ_PLT_PCREL34
:
14309 "pl%s %%0,0(0),1\n\t"
14310 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14311 TARGET_64BIT
? "d" : "wz",
14315 gcc_unreachable ();
14321 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14322 /* Emit an assembler directive to set symbol visibility for DECL to
14323 VISIBILITY_TYPE. */
14326 rs6000_assemble_visibility (tree decl
, int vis
)
14331 /* Functions need to have their entry point symbol visibility set as
14332 well as their descriptor symbol visibility. */
14333 if (DEFAULT_ABI
== ABI_AIX
14335 && TREE_CODE (decl
) == FUNCTION_DECL
)
14337 static const char * const visibility_types
[] = {
14338 NULL
, "protected", "hidden", "internal"
14341 const char *name
, *type
;
14343 name
= ((* targetm
.strip_name_encoding
)
14344 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
14345 type
= visibility_types
[vis
];
14347 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
14348 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
14351 default_assemble_visibility (decl
, vis
);
14356 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
14358 /* Reversal of FP compares takes care -- an ordered compare
14359 becomes an unordered compare and vice versa. */
14360 if (mode
== CCFPmode
14361 && (!flag_finite_math_only
14362 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
14363 || code
== UNEQ
|| code
== LTGT
))
14364 return reverse_condition_maybe_unordered (code
);
14366 return reverse_condition (code
);
14369 /* Generate a compare for CODE. Return a brand-new rtx that
14370 represents the result of the compare. */
14373 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
14375 machine_mode comp_mode
;
14376 rtx compare_result
;
14377 enum rtx_code code
= GET_CODE (cmp
);
14378 rtx op0
= XEXP (cmp
, 0);
14379 rtx op1
= XEXP (cmp
, 1);
14381 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
14382 comp_mode
= CCmode
;
14383 else if (FLOAT_MODE_P (mode
))
14384 comp_mode
= CCFPmode
;
14385 else if (code
== GTU
|| code
== LTU
14386 || code
== GEU
|| code
== LEU
)
14387 comp_mode
= CCUNSmode
;
14388 else if ((code
== EQ
|| code
== NE
)
14389 && unsigned_reg_p (op0
)
14390 && (unsigned_reg_p (op1
)
14391 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
14392 /* These are unsigned values, perhaps there will be a later
14393 ordering compare that can be shared with this one. */
14394 comp_mode
= CCUNSmode
;
14396 comp_mode
= CCmode
;
14398 /* If we have an unsigned compare, make sure we don't have a signed value as
14400 if (comp_mode
== CCUNSmode
&& CONST_INT_P (op1
)
14401 && INTVAL (op1
) < 0)
14403 op0
= copy_rtx_if_shared (op0
);
14404 op1
= force_reg (GET_MODE (op0
), op1
);
14405 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
14408 /* First, the compare. */
14409 compare_result
= gen_reg_rtx (comp_mode
);
14411 /* IEEE 128-bit support in VSX registers when we do not have hardware
14413 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
14415 rtx libfunc
= NULL_RTX
;
14416 bool check_nan
= false;
14423 libfunc
= optab_libfunc (eq_optab
, mode
);
14428 libfunc
= optab_libfunc (ge_optab
, mode
);
14433 libfunc
= optab_libfunc (le_optab
, mode
);
14438 libfunc
= optab_libfunc (unord_optab
, mode
);
14439 code
= (code
== UNORDERED
) ? NE
: EQ
;
14445 libfunc
= optab_libfunc (ge_optab
, mode
);
14446 code
= (code
== UNGE
) ? GE
: GT
;
14452 libfunc
= optab_libfunc (le_optab
, mode
);
14453 code
= (code
== UNLE
) ? LE
: LT
;
14459 libfunc
= optab_libfunc (eq_optab
, mode
);
14460 code
= (code
= UNEQ
) ? EQ
: NE
;
14464 gcc_unreachable ();
14467 gcc_assert (libfunc
);
14470 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
14471 SImode
, op0
, mode
, op1
, mode
);
14473 /* The library signals an exception for signalling NaNs, so we need to
14474 handle isgreater, etc. by first checking isordered. */
14477 rtx ne_rtx
, normal_dest
, unord_dest
;
14478 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
14479 rtx join_label
= gen_label_rtx ();
14480 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
14481 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
14484 /* Test for either value being a NaN. */
14485 gcc_assert (unord_func
);
14486 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
14487 SImode
, op0
, mode
, op1
, mode
);
14489 /* Set value (0) if either value is a NaN, and jump to the join
14491 dest
= gen_reg_rtx (SImode
);
14492 emit_move_insn (dest
, const1_rtx
);
14493 emit_insn (gen_rtx_SET (unord_cmp
,
14494 gen_rtx_COMPARE (comp_mode
, unord_dest
,
14497 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
14498 emit_jump_insn (gen_rtx_SET (pc_rtx
,
14499 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
14503 /* Do the normal comparison, knowing that the values are not
14505 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
14506 SImode
, op0
, mode
, op1
, mode
);
14508 emit_insn (gen_cstoresi4 (dest
,
14509 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
14511 normal_dest
, const0_rtx
));
14513 /* Join NaN and non-Nan paths. Compare dest against 0. */
14514 emit_label (join_label
);
14518 emit_insn (gen_rtx_SET (compare_result
,
14519 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
14524 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14525 CLOBBERs to match cmptf_internal2 pattern. */
14526 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
14527 && FLOAT128_IBM_P (GET_MODE (op0
))
14528 && TARGET_HARD_FLOAT
)
14529 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
14531 gen_rtx_SET (compare_result
,
14532 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
14533 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14534 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14535 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14536 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14537 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14538 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14539 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14540 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14541 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
14542 else if (GET_CODE (op1
) == UNSPEC
14543 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
14545 rtx op1b
= XVECEXP (op1
, 0, 0);
14546 comp_mode
= CCEQmode
;
14547 compare_result
= gen_reg_rtx (CCEQmode
);
14549 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
14551 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
14554 emit_insn (gen_rtx_SET (compare_result
,
14555 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
14558 validate_condition_mode (code
, GET_MODE (compare_result
));
14560 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
14564 /* Return the diagnostic message string if the binary operation OP is
14565 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14568 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
14572 machine_mode mode1
= TYPE_MODE (type1
);
14573 machine_mode mode2
= TYPE_MODE (type2
);
14575 /* For complex modes, use the inner type. */
14576 if (COMPLEX_MODE_P (mode1
))
14577 mode1
= GET_MODE_INNER (mode1
);
14579 if (COMPLEX_MODE_P (mode2
))
14580 mode2
= GET_MODE_INNER (mode2
);
14582 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14583 double to intermix unless -mfloat128-convert. */
14584 if (mode1
== mode2
)
14587 if (!TARGET_FLOAT128_CVT
)
14589 if ((FLOAT128_IEEE_P (mode1
) && FLOAT128_IBM_P (mode2
))
14590 || (FLOAT128_IBM_P (mode1
) && FLOAT128_IEEE_P (mode2
)))
14591 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
14599 /* Expand floating point conversion to/from __float128 and __ibm128. */
14602 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
14604 machine_mode dest_mode
= GET_MODE (dest
);
14605 machine_mode src_mode
= GET_MODE (src
);
14606 convert_optab cvt
= unknown_optab
;
14607 bool do_move
= false;
14608 rtx libfunc
= NULL_RTX
;
14610 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
14611 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
14615 rtx_2func_t from_df
;
14616 rtx_2func_t from_sf
;
14617 rtx_2func_t from_si_sign
;
14618 rtx_2func_t from_si_uns
;
14619 rtx_2func_t from_di_sign
;
14620 rtx_2func_t from_di_uns
;
14623 rtx_2func_t to_si_sign
;
14624 rtx_2func_t to_si_uns
;
14625 rtx_2func_t to_di_sign
;
14626 rtx_2func_t to_di_uns
;
14627 } hw_conversions
[2] = {
14628 /* convertions to/from KFmode */
14630 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
14631 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
14632 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
14633 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
14634 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
14635 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
14636 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
14637 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
14638 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
14639 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
14640 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
14641 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
14644 /* convertions to/from TFmode */
14646 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
14647 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
14648 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
14649 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
14650 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
14651 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
14652 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
14653 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
14654 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
14655 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
14656 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
14657 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
14661 if (dest_mode
== src_mode
)
14662 gcc_unreachable ();
14664 /* Eliminate memory operations. */
14666 src
= force_reg (src_mode
, src
);
14670 rtx tmp
= gen_reg_rtx (dest_mode
);
14671 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
14672 rs6000_emit_move (dest
, tmp
, dest_mode
);
14676 /* Convert to IEEE 128-bit floating point. */
14677 if (FLOAT128_IEEE_P (dest_mode
))
14679 if (dest_mode
== KFmode
)
14681 else if (dest_mode
== TFmode
)
14684 gcc_unreachable ();
14690 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
14695 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
14701 if (FLOAT128_IBM_P (src_mode
))
14710 cvt
= ufloat_optab
;
14711 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
14715 cvt
= sfloat_optab
;
14716 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
14723 cvt
= ufloat_optab
;
14724 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
14728 cvt
= sfloat_optab
;
14729 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
14734 gcc_unreachable ();
14738 /* Convert from IEEE 128-bit floating point. */
14739 else if (FLOAT128_IEEE_P (src_mode
))
14741 if (src_mode
== KFmode
)
14743 else if (src_mode
== TFmode
)
14746 gcc_unreachable ();
14752 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
14757 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
14763 if (FLOAT128_IBM_P (dest_mode
))
14773 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
14778 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
14786 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
14791 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
14796 gcc_unreachable ();
14800 /* Both IBM format. */
14801 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
14805 gcc_unreachable ();
14807 /* Handle conversion between TFmode/KFmode/IFmode. */
14809 emit_insn (gen_rtx_SET (dest
, gen_rtx_FLOAT_EXTEND (dest_mode
, src
)));
14811 /* Handle conversion if we have hardware support. */
14812 else if (TARGET_FLOAT128_HW
&& hw_convert
)
14813 emit_insn ((hw_convert
) (dest
, src
));
14815 /* Call an external function to do the conversion. */
14816 else if (cvt
!= unknown_optab
)
14818 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
14819 gcc_assert (libfunc
!= NULL_RTX
);
14821 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
,
14824 gcc_assert (dest2
!= NULL_RTX
);
14825 if (!rtx_equal_p (dest
, dest2
))
14826 emit_move_insn (dest
, dest2
);
14830 gcc_unreachable ();
14836 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14837 can be used as that dest register. Return the dest register. */
14840 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
14842 if (op2
== const0_rtx
)
14845 if (GET_CODE (scratch
) == SCRATCH
)
14846 scratch
= gen_reg_rtx (mode
);
14848 if (logical_operand (op2
, mode
))
14849 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
14851 emit_insn (gen_rtx_SET (scratch
,
14852 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
14857 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14858 requires this. The result is mode MODE. */
14860 rs6000_emit_fp_cror (rtx_code code
, machine_mode mode
, rtx x
)
14864 if (code
== LTGT
|| code
== LE
|| code
== UNLT
)
14865 cond
[n
++] = gen_rtx_fmt_ee (LT
, mode
, x
, const0_rtx
);
14866 if (code
== LTGT
|| code
== GE
|| code
== UNGT
)
14867 cond
[n
++] = gen_rtx_fmt_ee (GT
, mode
, x
, const0_rtx
);
14868 if (code
== LE
|| code
== GE
|| code
== UNEQ
)
14869 cond
[n
++] = gen_rtx_fmt_ee (EQ
, mode
, x
, const0_rtx
);
14870 if (code
== UNLT
|| code
== UNGT
|| code
== UNEQ
)
14871 cond
[n
++] = gen_rtx_fmt_ee (UNORDERED
, mode
, x
, const0_rtx
);
14873 gcc_assert (n
== 2);
14875 rtx cc
= gen_reg_rtx (CCEQmode
);
14876 rtx logical
= gen_rtx_IOR (mode
, cond
[0], cond
[1]);
14877 emit_insn (gen_cceq_ior_compare (mode
, cc
, logical
, cond
[0], x
, cond
[1], x
));
14883 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
14885 rtx condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
14886 rtx_code cond_code
= GET_CODE (condition_rtx
);
14888 if (FLOAT_MODE_P (mode
) && HONOR_NANS (mode
)
14889 && !(FLOAT128_VECTOR_P (mode
) && !TARGET_FLOAT128_HW
))
14891 else if (cond_code
== NE
14892 || cond_code
== GE
|| cond_code
== LE
14893 || cond_code
== GEU
|| cond_code
== LEU
14894 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
14896 rtx not_result
= gen_reg_rtx (CCEQmode
);
14897 rtx not_op
, rev_cond_rtx
;
14898 machine_mode cc_mode
;
14900 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
14902 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
14903 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
14904 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
14905 emit_insn (gen_rtx_SET (not_result
, not_op
));
14906 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
14909 machine_mode op_mode
= GET_MODE (XEXP (operands
[1], 0));
14910 if (op_mode
== VOIDmode
)
14911 op_mode
= GET_MODE (XEXP (operands
[1], 1));
14913 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
14915 PUT_MODE (condition_rtx
, DImode
);
14916 convert_move (operands
[0], condition_rtx
, 0);
14920 PUT_MODE (condition_rtx
, SImode
);
14921 emit_insn (gen_rtx_SET (operands
[0], condition_rtx
));
14925 /* Emit a branch of kind CODE to location LOC. */
14928 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
14930 rtx condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
14931 rtx loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
14932 rtx ite
= gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
, loc_ref
, pc_rtx
);
14933 emit_jump_insn (gen_rtx_SET (pc_rtx
, ite
));
14936 /* Return the string to output a conditional branch to LABEL, which is
14937 the operand template of the label, or NULL if the branch is really a
14938 conditional return.
14940 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14941 condition code register and its mode specifies what kind of
14942 comparison we made.
14944 REVERSED is nonzero if we should reverse the sense of the comparison.
14946 INSN is the insn. */
14949 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
14951 static char string
[64];
14952 enum rtx_code code
= GET_CODE (op
);
14953 rtx cc_reg
= XEXP (op
, 0);
14954 machine_mode mode
= GET_MODE (cc_reg
);
14955 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
14956 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
14957 int really_reversed
= reversed
^ need_longbranch
;
14963 validate_condition_mode (code
, mode
);
14965 /* Work out which way this really branches. We could use
14966 reverse_condition_maybe_unordered here always but this
14967 makes the resulting assembler clearer. */
14968 if (really_reversed
)
14970 /* Reversal of FP compares takes care -- an ordered compare
14971 becomes an unordered compare and vice versa. */
14972 if (mode
== CCFPmode
)
14973 code
= reverse_condition_maybe_unordered (code
);
14975 code
= reverse_condition (code
);
14980 /* Not all of these are actually distinct opcodes, but
14981 we distinguish them for clarity of the resulting assembler. */
14982 case NE
: case LTGT
:
14983 ccode
= "ne"; break;
14984 case EQ
: case UNEQ
:
14985 ccode
= "eq"; break;
14987 ccode
= "ge"; break;
14988 case GT
: case GTU
: case UNGT
:
14989 ccode
= "gt"; break;
14991 ccode
= "le"; break;
14992 case LT
: case LTU
: case UNLT
:
14993 ccode
= "lt"; break;
14994 case UNORDERED
: ccode
= "un"; break;
14995 case ORDERED
: ccode
= "nu"; break;
14996 case UNGE
: ccode
= "nl"; break;
14997 case UNLE
: ccode
= "ng"; break;
14999 gcc_unreachable ();
15002 /* Maybe we have a guess as to how likely the branch is. */
15004 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
15005 if (note
!= NULL_RTX
)
15007 /* PROB is the difference from 50%. */
15008 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
15009 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
15011 /* Only hint for highly probable/improbable branches on newer cpus when
15012 we have real profile data, as static prediction overrides processor
15013 dynamic prediction. For older cpus we may as well always hint, but
15014 assume not taken for branches that are very close to 50% as a
15015 mispredicted taken branch is more expensive than a
15016 mispredicted not-taken branch. */
15017 if (rs6000_always_hint
15018 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
15019 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
15020 && br_prob_note_reliable_p (note
)))
15022 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
15023 && ((prob
> 0) ^ need_longbranch
))
15031 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
15033 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
15035 /* We need to escape any '%' characters in the reg_names string.
15036 Assume they'd only be the first character.... */
15037 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
15039 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
15043 /* If the branch distance was too far, we may have to use an
15044 unconditional branch to go the distance. */
15045 if (need_longbranch
)
15046 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
15048 s
+= sprintf (s
, ",%s", label
);
15054 /* Return insn for VSX or Altivec comparisons. */
15057 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
15060 machine_mode mode
= GET_MODE (op0
);
15068 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15079 mask
= gen_reg_rtx (mode
);
15080 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
15087 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15088 DMODE is expected destination mode. This is a recursive function. */
15091 rs6000_emit_vector_compare (enum rtx_code rcode
,
15093 machine_mode dmode
)
15096 bool swap_operands
= false;
15097 bool try_again
= false;
15099 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
15100 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
15102 /* See if the comparison works as is. */
15103 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15111 swap_operands
= true;
15116 swap_operands
= true;
15124 /* Invert condition and try again.
15125 e.g., A != B becomes ~(A==B). */
15127 enum rtx_code rev_code
;
15128 enum insn_code nor_code
;
15131 rev_code
= reverse_condition_maybe_unordered (rcode
);
15132 if (rev_code
== UNKNOWN
)
15135 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
15136 if (nor_code
== CODE_FOR_nothing
)
15139 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
15143 mask
= gen_reg_rtx (dmode
);
15144 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
15152 /* Try GT/GTU/LT/LTU OR EQ */
15155 enum insn_code ior_code
;
15156 enum rtx_code new_code
;
15177 gcc_unreachable ();
15180 ior_code
= optab_handler (ior_optab
, dmode
);
15181 if (ior_code
== CODE_FOR_nothing
)
15184 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
15188 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
15192 mask
= gen_reg_rtx (dmode
);
15193 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
15204 std::swap (op0
, op1
);
15206 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15211 /* You only get two chances. */
15215 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15216 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15217 operands for the relation operation COND. */
15220 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
15221 rtx cond
, rtx cc_op0
, rtx cc_op1
)
15223 machine_mode dest_mode
= GET_MODE (dest
);
15224 machine_mode mask_mode
= GET_MODE (cc_op0
);
15225 enum rtx_code rcode
= GET_CODE (cond
);
15226 machine_mode cc_mode
= CCmode
;
15229 bool invert_move
= false;
15231 if (VECTOR_UNIT_NONE_P (dest_mode
))
15234 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
15235 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
15239 /* Swap operands if we can, and fall back to doing the operation as
15240 specified, and doing a NOR to invert the test. */
15246 /* Invert condition and try again.
15247 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15248 invert_move
= true;
15249 rcode
= reverse_condition_maybe_unordered (rcode
);
15250 if (rcode
== UNKNOWN
)
15256 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
15258 /* Invert condition to avoid compound test. */
15259 invert_move
= true;
15260 rcode
= reverse_condition (rcode
);
15268 /* Mark unsigned tests with CCUNSmode. */
15269 cc_mode
= CCUNSmode
;
15271 /* Invert condition to avoid compound test if necessary. */
15272 if (rcode
== GEU
|| rcode
== LEU
)
15274 invert_move
= true;
15275 rcode
= reverse_condition (rcode
);
15283 /* Get the vector mask for the given relational operations. */
15284 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
15290 std::swap (op_true
, op_false
);
15292 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15293 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
15294 && (GET_CODE (op_true
) == CONST_VECTOR
15295 || GET_CODE (op_false
) == CONST_VECTOR
))
15297 rtx constant_0
= CONST0_RTX (dest_mode
);
15298 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
15300 if (op_true
== constant_m1
&& op_false
== constant_0
)
15302 emit_move_insn (dest
, mask
);
15306 else if (op_true
== constant_0
&& op_false
== constant_m1
)
15308 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
15312 /* If we can't use the vector comparison directly, perhaps we can use
15313 the mask for the true or false fields, instead of loading up a
15315 if (op_true
== constant_m1
)
15318 if (op_false
== constant_0
)
15322 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
15323 op_true
= force_reg (dest_mode
, op_true
);
15325 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
15326 op_false
= force_reg (dest_mode
, op_false
);
15328 cond2
= gen_rtx_fmt_ee (NE
, cc_mode
, gen_lowpart (dest_mode
, mask
),
15329 CONST0_RTX (dest_mode
));
15330 emit_insn (gen_rtx_SET (dest
,
15331 gen_rtx_IF_THEN_ELSE (dest_mode
,
15338 /* Possibly emit the xsmaxcdp and xsmincdp instructions to emit a maximum or
15339 minimum with "C" semantics.
15341 Unless you use -ffast-math, you can't use these instructions to replace
15342 conditions that implicitly reverse the condition because the comparison
15343 might generate a NaN or signed zer0.
15345 I.e. the following can be replaced all of the time
15346 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15347 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15348 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15349 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15351 The following can be replaced only if -ffast-math is used:
15352 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15353 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15354 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15355 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15357 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15358 nonzero/true, FALSE_COND if it is zero/false.
15360 Return false if we can't generate the appropriate minimum or maximum, and
15361 true if we can did the minimum or maximum. */
15364 rs6000_maybe_emit_maxc_minc (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15366 enum rtx_code code
= GET_CODE (op
);
15367 rtx op0
= XEXP (op
, 0);
15368 rtx op1
= XEXP (op
, 1);
15369 machine_mode compare_mode
= GET_MODE (op0
);
15370 machine_mode result_mode
= GET_MODE (dest
);
15371 bool max_p
= false;
15373 if (result_mode
!= compare_mode
)
15376 if (code
== GE
|| code
== GT
)
15378 else if (code
== LE
|| code
== LT
)
15383 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
15386 /* Only when NaNs and signed-zeros are not in effect, smax could be
15387 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15388 `op0 > op1 ? op1 : op0`. */
15389 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
)
15390 && !HONOR_NANS (compare_mode
) && !HONOR_SIGNED_ZEROS (compare_mode
))
15396 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
15400 /* Possibly emit a floating point conditional move by generating a compare that
15401 sets a mask instruction and a XXSEL select instruction.
15403 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15404 nonzero/true, FALSE_COND if it is zero/false.
15406 Return false if the operation cannot be generated, and true if we could
15407 generate the instruction. */
15410 rs6000_maybe_emit_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15412 enum rtx_code code
= GET_CODE (op
);
15413 rtx op0
= XEXP (op
, 0);
15414 rtx op1
= XEXP (op
, 1);
15415 machine_mode result_mode
= GET_MODE (dest
);
15420 if (!can_create_pseudo_p ())
15433 code
= swap_condition (code
);
15434 std::swap (op0
, op1
);
15441 /* Generate: [(parallel [(set (dest)
15442 (if_then_else (op (cmp1) (cmp2))
15445 (clobber (scratch))])]. */
15447 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
15448 cmove_rtx
= gen_rtx_SET (dest
,
15449 gen_rtx_IF_THEN_ELSE (result_mode
,
15454 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
15455 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
15456 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
15461 /* Helper function to return true if the target has instructions to do a
15462 compare and set mask instruction that can be used with XXSEL to implement a
15463 conditional move. It is also assumed that such a target also supports the
15464 "C" minimum and maximum instructions. */
15467 have_compare_and_set_mask (machine_mode mode
)
15473 return TARGET_P9_MINMAX
;
15482 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
15483 operands of the last comparison is nonzero/true, FALSE_COND if it
15484 is zero/false. Return 0 if the hardware has no such operation. */
15487 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15489 enum rtx_code code
= GET_CODE (op
);
15490 rtx op0
= XEXP (op
, 0);
15491 rtx op1
= XEXP (op
, 1);
15492 machine_mode compare_mode
= GET_MODE (op0
);
15493 machine_mode result_mode
= GET_MODE (dest
);
15495 bool is_against_zero
;
15497 /* These modes should always match. */
15498 if (GET_MODE (op1
) != compare_mode
15499 /* In the isel case however, we can use a compare immediate, so
15500 op1 may be a small constant. */
15501 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
15503 if (GET_MODE (true_cond
) != result_mode
)
15505 if (GET_MODE (false_cond
) != result_mode
)
15508 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
15510 if (have_compare_and_set_mask (compare_mode
)
15511 && have_compare_and_set_mask (result_mode
))
15513 if (rs6000_maybe_emit_maxc_minc (dest
, op
, true_cond
, false_cond
))
15516 if (rs6000_maybe_emit_fp_cmove (dest
, op
, true_cond
, false_cond
))
15520 /* Don't allow using floating point comparisons for integer results for
15522 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
15525 /* First, work out if the hardware can do this at all, or
15526 if it's too slow.... */
15527 if (!FLOAT_MODE_P (compare_mode
))
15530 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
15534 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
15536 /* A floating-point subtract might overflow, underflow, or produce
15537 an inexact result, thus changing the floating-point flags, so it
15538 can't be generated if we care about that. It's safe if one side
15539 of the construct is zero, since then no subtract will be
15541 if (SCALAR_FLOAT_MODE_P (compare_mode
)
15542 && flag_trapping_math
&& ! is_against_zero
)
15545 /* Eliminate half of the comparisons by switching operands, this
15546 makes the remaining code simpler. */
15547 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
15548 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
15550 code
= reverse_condition_maybe_unordered (code
);
15552 true_cond
= false_cond
;
15556 /* UNEQ and LTGT take four instructions for a comparison with zero,
15557 it'll probably be faster to use a branch here too. */
15558 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
15561 /* We're going to try to implement comparisons by performing
15562 a subtract, then comparing against zero. Unfortunately,
15563 Inf - Inf is NaN which is not zero, and so if we don't
15564 know that the operand is finite and the comparison
15565 would treat EQ different to UNORDERED, we can't do it. */
15566 if (HONOR_INFINITIES (compare_mode
)
15567 && code
!= GT
&& code
!= UNGE
15568 && (!CONST_DOUBLE_P (op1
)
15569 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
15570 /* Constructs of the form (a OP b ? a : b) are safe. */
15571 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
15572 || (! rtx_equal_p (op0
, true_cond
)
15573 && ! rtx_equal_p (op1
, true_cond
))))
15576 /* At this point we know we can use fsel. */
15578 /* Don't allow compare_mode other than SFmode or DFmode, for others there
15579 is no fsel instruction. */
15580 if (compare_mode
!= SFmode
&& compare_mode
!= DFmode
)
15583 /* Reduce the comparison to a comparison against zero. */
15584 if (! is_against_zero
)
15586 temp
= gen_reg_rtx (compare_mode
);
15587 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
15589 op1
= CONST0_RTX (compare_mode
);
15592 /* If we don't care about NaNs we can reduce some of the comparisons
15593 down to faster ones. */
15594 if (! HONOR_NANS (compare_mode
))
15600 true_cond
= false_cond
;
15613 /* Now, reduce everything down to a GE. */
15620 temp
= gen_reg_rtx (compare_mode
);
15621 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
15626 temp
= gen_reg_rtx (compare_mode
);
15627 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
15632 temp
= gen_reg_rtx (compare_mode
);
15633 emit_insn (gen_rtx_SET (temp
,
15634 gen_rtx_NEG (compare_mode
,
15635 gen_rtx_ABS (compare_mode
, op0
))));
15640 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15641 temp
= gen_reg_rtx (result_mode
);
15642 emit_insn (gen_rtx_SET (temp
,
15643 gen_rtx_IF_THEN_ELSE (result_mode
,
15644 gen_rtx_GE (VOIDmode
,
15646 true_cond
, false_cond
)));
15647 false_cond
= true_cond
;
15650 temp
= gen_reg_rtx (compare_mode
);
15651 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
15656 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15657 temp
= gen_reg_rtx (result_mode
);
15658 emit_insn (gen_rtx_SET (temp
,
15659 gen_rtx_IF_THEN_ELSE (result_mode
,
15660 gen_rtx_GE (VOIDmode
,
15662 true_cond
, false_cond
)));
15663 true_cond
= false_cond
;
15666 temp
= gen_reg_rtx (compare_mode
);
15667 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
15672 gcc_unreachable ();
15675 emit_insn (gen_rtx_SET (dest
,
15676 gen_rtx_IF_THEN_ELSE (result_mode
,
15677 gen_rtx_GE (VOIDmode
,
15679 true_cond
, false_cond
)));
15683 /* Same as above, but for ints (isel). */
15686 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15688 rtx condition_rtx
, cr
;
15689 machine_mode mode
= GET_MODE (dest
);
15690 enum rtx_code cond_code
;
15691 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
15694 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
15697 /* We still have to do the compare, because isel doesn't do a
15698 compare, it just looks at the CRx bits set by a previous compare
15700 condition_rtx
= rs6000_generate_compare (op
, mode
);
15701 cond_code
= GET_CODE (condition_rtx
);
15702 cr
= XEXP (condition_rtx
, 0);
15703 signedp
= GET_MODE (cr
) == CCmode
;
15705 isel_func
= (mode
== SImode
15706 ? (signedp
? gen_isel_signed_si
: gen_isel_unsigned_si
)
15707 : (signedp
? gen_isel_signed_di
: gen_isel_unsigned_di
));
15711 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
15712 /* isel handles these directly. */
15716 /* We need to swap the sense of the comparison. */
15718 std::swap (false_cond
, true_cond
);
15719 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
15724 false_cond
= force_reg (mode
, false_cond
);
15725 if (true_cond
!= const0_rtx
)
15726 true_cond
= force_reg (mode
, true_cond
);
15728 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
15734 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
15736 machine_mode mode
= GET_MODE (op0
);
15740 /* VSX/altivec have direct min/max insns. */
15741 if ((code
== SMAX
|| code
== SMIN
)
15742 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
15743 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))))
15745 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
15749 if (code
== SMAX
|| code
== SMIN
)
15754 if (code
== SMAX
|| code
== UMAX
)
15755 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
15756 op0
, op1
, mode
, 0);
15758 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
15759 op1
, op0
, mode
, 0);
15760 gcc_assert (target
);
15761 if (target
!= dest
)
15762 emit_move_insn (dest
, target
);
15765 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15766 COND is true. Mark the jump as unlikely to be taken. */
15769 emit_unlikely_jump (rtx cond
, rtx label
)
15771 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
15772 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
15773 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
15776 /* A subroutine of the atomic operation splitters. Emit a load-locked
15777 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15778 the zero_extend operation. */
15781 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
15783 rtx (*fn
) (rtx
, rtx
) = NULL
;
15788 fn
= gen_load_lockedqi
;
15791 fn
= gen_load_lockedhi
;
15794 if (GET_MODE (mem
) == QImode
)
15795 fn
= gen_load_lockedqi_si
;
15796 else if (GET_MODE (mem
) == HImode
)
15797 fn
= gen_load_lockedhi_si
;
15799 fn
= gen_load_lockedsi
;
15802 fn
= gen_load_lockeddi
;
15805 fn
= gen_load_lockedti
;
15808 gcc_unreachable ();
15810 emit_insn (fn (reg
, mem
));
15813 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15814 instruction in MODE. */
15817 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
15819 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
15824 fn
= gen_store_conditionalqi
;
15827 fn
= gen_store_conditionalhi
;
15830 fn
= gen_store_conditionalsi
;
15833 fn
= gen_store_conditionaldi
;
15836 fn
= gen_store_conditionalti
;
15839 gcc_unreachable ();
15842 /* Emit sync before stwcx. to address PPC405 Erratum. */
15843 if (PPC405_ERRATUM77
)
15844 emit_insn (gen_hwsync ());
15846 emit_insn (fn (res
, mem
, val
));
15849 /* Expand barriers before and after a load_locked/store_cond sequence. */
15852 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
15854 rtx addr
= XEXP (mem
, 0);
15856 if (!legitimate_indirect_address_p (addr
, reload_completed
)
15857 && !legitimate_indexed_address_p (addr
, reload_completed
))
15859 addr
= force_reg (Pmode
, addr
);
15860 mem
= replace_equiv_address_nv (mem
, addr
);
15865 case MEMMODEL_RELAXED
:
15866 case MEMMODEL_CONSUME
:
15867 case MEMMODEL_ACQUIRE
:
15869 case MEMMODEL_RELEASE
:
15870 case MEMMODEL_ACQ_REL
:
15871 emit_insn (gen_lwsync ());
15873 case MEMMODEL_SEQ_CST
:
15874 emit_insn (gen_hwsync ());
15877 gcc_unreachable ();
15883 rs6000_post_atomic_barrier (enum memmodel model
)
15887 case MEMMODEL_RELAXED
:
15888 case MEMMODEL_CONSUME
:
15889 case MEMMODEL_RELEASE
:
15891 case MEMMODEL_ACQUIRE
:
15892 case MEMMODEL_ACQ_REL
:
15893 case MEMMODEL_SEQ_CST
:
15894 emit_insn (gen_isync ());
15897 gcc_unreachable ();
15901 /* A subroutine of the various atomic expanders. For sub-word operations,
15902 we must adjust things to operate on SImode. Given the original MEM,
15903 return a new aligned memory. Also build and return the quantities by
15904 which to shift and mask. */
15907 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
15909 rtx addr
, align
, shift
, mask
, mem
;
15910 HOST_WIDE_INT shift_mask
;
15911 machine_mode mode
= GET_MODE (orig_mem
);
15913 /* For smaller modes, we have to implement this via SImode. */
15914 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
15916 addr
= XEXP (orig_mem
, 0);
15917 addr
= force_reg (GET_MODE (addr
), addr
);
15919 /* Aligned memory containing subword. Generate a new memory. We
15920 do not want any of the existing MEM_ATTR data, as we're now
15921 accessing memory outside the original object. */
15922 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
15923 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15924 mem
= gen_rtx_MEM (SImode
, align
);
15925 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
15926 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
15927 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
15929 /* Shift amount for subword relative to aligned word. */
15930 shift
= gen_reg_rtx (SImode
);
15931 addr
= gen_lowpart (SImode
, addr
);
15932 rtx tmp
= gen_reg_rtx (SImode
);
15933 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
15934 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
15935 if (BYTES_BIG_ENDIAN
)
15936 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
15937 shift
, 1, OPTAB_LIB_WIDEN
);
15940 /* Mask for insertion. */
15941 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
15942 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
15948 /* A subroutine of the various atomic expanders. For sub-word operands,
15949 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15952 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
15956 x
= gen_reg_rtx (SImode
);
15957 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
15958 gen_rtx_NOT (SImode
, mask
),
15961 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
15966 /* A subroutine of the various atomic expanders. For sub-word operands,
15967 extract WIDE to NARROW via SHIFT. */
15970 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
15972 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
15973 wide
, 1, OPTAB_LIB_WIDEN
);
15974 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
15977 /* Expand an atomic compare and swap operation. */
15980 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
15982 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
15983 rtx label1
, label2
, x
, mask
, shift
;
15984 machine_mode mode
, orig_mode
;
15985 enum memmodel mod_s
, mod_f
;
15988 boolval
= operands
[0];
15989 retval
= operands
[1];
15991 oldval
= operands
[3];
15992 newval
= operands
[4];
15993 is_weak
= (INTVAL (operands
[5]) != 0);
15994 mod_s
= memmodel_base (INTVAL (operands
[6]));
15995 mod_f
= memmodel_base (INTVAL (operands
[7]));
15996 orig_mode
= mode
= GET_MODE (mem
);
15998 mask
= shift
= NULL_RTX
;
15999 if (mode
== QImode
|| mode
== HImode
)
16001 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16002 lwarx and shift/mask operations. With power8, we need to do the
16003 comparison in SImode, but the store is still done in QI/HImode. */
16004 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
16006 if (!TARGET_SYNC_HI_QI
)
16008 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16010 /* Shift and mask OLDVAL into position with the word. */
16011 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
16012 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16014 /* Shift and mask NEWVAL into position within the word. */
16015 newval
= convert_modes (SImode
, mode
, newval
, 1);
16016 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
16017 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16020 /* Prepare to adjust the return value. */
16021 retval
= gen_reg_rtx (SImode
);
16024 else if (reg_overlap_mentioned_p (retval
, oldval
))
16025 oldval
= copy_to_reg (oldval
);
16027 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
16028 oldval
= copy_to_mode_reg (mode
, oldval
);
16030 if (reg_overlap_mentioned_p (retval
, newval
))
16031 newval
= copy_to_reg (newval
);
16033 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
16038 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16039 emit_label (XEXP (label1
, 0));
16041 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16043 emit_load_locked (mode
, retval
, mem
);
16047 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
16048 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16050 cond
= gen_reg_rtx (CCmode
);
16051 /* If we have TImode, synthesize a comparison. */
16052 if (mode
!= TImode
)
16053 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
16056 rtx xor1_result
= gen_reg_rtx (DImode
);
16057 rtx xor2_result
= gen_reg_rtx (DImode
);
16058 rtx or_result
= gen_reg_rtx (DImode
);
16059 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
16060 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
16061 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
16062 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
16064 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
16065 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
16066 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
16067 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
16070 emit_insn (gen_rtx_SET (cond
, x
));
16072 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16073 emit_unlikely_jump (x
, label2
);
16077 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
16079 emit_store_conditional (orig_mode
, cond
, mem
, x
);
16083 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16084 emit_unlikely_jump (x
, label1
);
16087 if (!is_mm_relaxed (mod_f
))
16088 emit_label (XEXP (label2
, 0));
16090 rs6000_post_atomic_barrier (mod_s
);
16092 if (is_mm_relaxed (mod_f
))
16093 emit_label (XEXP (label2
, 0));
16096 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
16097 else if (mode
!= GET_MODE (operands
[1]))
16098 convert_move (operands
[1], retval
, 1);
16100 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16101 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
16102 emit_insn (gen_rtx_SET (boolval
, x
));
16105 /* Expand an atomic exchange operation. */
16108 rs6000_expand_atomic_exchange (rtx operands
[])
16110 rtx retval
, mem
, val
, cond
;
16112 enum memmodel model
;
16113 rtx label
, x
, mask
, shift
;
16115 retval
= operands
[0];
16118 model
= memmodel_base (INTVAL (operands
[3]));
16119 mode
= GET_MODE (mem
);
16121 mask
= shift
= NULL_RTX
;
16122 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
16124 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16126 /* Shift and mask VAL into position with the word. */
16127 val
= convert_modes (SImode
, mode
, val
, 1);
16128 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16129 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16131 /* Prepare to adjust the return value. */
16132 retval
= gen_reg_rtx (SImode
);
16136 mem
= rs6000_pre_atomic_barrier (mem
, model
);
16138 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16139 emit_label (XEXP (label
, 0));
16141 emit_load_locked (mode
, retval
, mem
);
16145 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
16147 cond
= gen_reg_rtx (CCmode
);
16148 emit_store_conditional (mode
, cond
, mem
, x
);
16150 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16151 emit_unlikely_jump (x
, label
);
16153 rs6000_post_atomic_barrier (model
);
16156 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
16159 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16160 to perform. MEM is the memory on which to operate. VAL is the second
16161 operand of the binary operator. BEFORE and AFTER are optional locations to
16162 return the value of MEM either before of after the operation. MODEL_RTX
16163 is a CONST_INT containing the memory model to use. */
16166 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
16167 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
16169 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
16170 machine_mode mode
= GET_MODE (mem
);
16171 machine_mode store_mode
= mode
;
16172 rtx label
, x
, cond
, mask
, shift
;
16173 rtx before
= orig_before
, after
= orig_after
;
16175 mask
= shift
= NULL_RTX
;
16176 /* On power8, we want to use SImode for the operation. On previous systems,
16177 use the operation in a subword and shift/mask to get the proper byte or
16179 if (mode
== QImode
|| mode
== HImode
)
16181 if (TARGET_SYNC_HI_QI
)
16183 val
= convert_modes (SImode
, mode
, val
, 1);
16185 /* Prepare to adjust the return value. */
16186 before
= gen_reg_rtx (SImode
);
16188 after
= gen_reg_rtx (SImode
);
16193 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16195 /* Shift and mask VAL into position with the word. */
16196 val
= convert_modes (SImode
, mode
, val
, 1);
16197 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16198 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16204 /* We've already zero-extended VAL. That is sufficient to
16205 make certain that it does not affect other bits. */
16210 /* If we make certain that all of the other bits in VAL are
16211 set, that will be sufficient to not affect other bits. */
16212 x
= gen_rtx_NOT (SImode
, mask
);
16213 x
= gen_rtx_IOR (SImode
, x
, val
);
16214 emit_insn (gen_rtx_SET (val
, x
));
16221 /* These will all affect bits outside the field and need
16222 adjustment via MASK within the loop. */
16226 gcc_unreachable ();
16229 /* Prepare to adjust the return value. */
16230 before
= gen_reg_rtx (SImode
);
16232 after
= gen_reg_rtx (SImode
);
16233 store_mode
= mode
= SImode
;
16237 mem
= rs6000_pre_atomic_barrier (mem
, model
);
16239 label
= gen_label_rtx ();
16240 emit_label (label
);
16241 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
16243 if (before
== NULL_RTX
)
16244 before
= gen_reg_rtx (mode
);
16246 emit_load_locked (mode
, before
, mem
);
16250 x
= expand_simple_binop (mode
, AND
, before
, val
,
16251 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16252 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
16256 after
= expand_simple_binop (mode
, code
, before
, val
,
16257 after
, 1, OPTAB_LIB_WIDEN
);
16263 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
16264 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16265 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
16267 else if (store_mode
!= mode
)
16268 x
= convert_modes (store_mode
, mode
, x
, 1);
16270 cond
= gen_reg_rtx (CCmode
);
16271 emit_store_conditional (store_mode
, cond
, mem
, x
);
16273 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16274 emit_unlikely_jump (x
, label
);
16276 rs6000_post_atomic_barrier (model
);
16280 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16281 then do the calcuations in a SImode register. */
16283 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
16285 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
16287 else if (store_mode
!= mode
)
16289 /* QImode/HImode on machines with lbarx/lharx where we do the native
16290 operation and then do the calcuations in a SImode register. */
16292 convert_move (orig_before
, before
, 1);
16294 convert_move (orig_after
, after
, 1);
16296 else if (orig_after
&& after
!= orig_after
)
16297 emit_move_insn (orig_after
, after
);
16300 /* Emit instructions to move SRC to DST. Called by splitters for
16301 multi-register moves. It will emit at most one instruction for
16302 each register that is accessed; that is, it won't emit li/lis pairs
16303 (or equivalent for 64-bit code). One of SRC or DST must be a hard
16307 rs6000_split_multireg_move (rtx dst
, rtx src
)
16309 /* The register number of the first register being moved. */
16311 /* The mode that is to be moved. */
16313 /* The mode that the move is being done in, and its size. */
16314 machine_mode reg_mode
;
16316 /* The number of registers that will be moved. */
16319 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
16320 mode
= GET_MODE (dst
);
16321 nregs
= hard_regno_nregs (reg
, mode
);
16323 /* If we have a vector quad register for MMA, and this is a load or store,
16324 see if we can use vector paired load/stores. */
16325 if (mode
== XOmode
&& TARGET_MMA
16326 && (MEM_P (dst
) || MEM_P (src
)))
16331 /* If we have a vector pair/quad mode, split it into two/four separate
16333 else if (mode
== OOmode
|| mode
== XOmode
)
16334 reg_mode
= V1TImode
;
16335 else if (FP_REGNO_P (reg
))
16336 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
16337 (TARGET_HARD_FLOAT
? DFmode
: SFmode
);
16338 else if (ALTIVEC_REGNO_P (reg
))
16339 reg_mode
= V16QImode
;
16341 reg_mode
= word_mode
;
16342 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
16344 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
16346 /* TDmode residing in FP registers is special, since the ISA requires that
16347 the lower-numbered word of a register pair is always the most significant
16348 word, even in little-endian mode. This does not match the usual subreg
16349 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
16350 the appropriate constituent registers "by hand" in little-endian mode.
16352 Note we do not need to check for destructive overlap here since TDmode
16353 can only reside in even/odd register pairs. */
16354 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
16359 for (i
= 0; i
< nregs
; i
++)
16361 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
16362 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
16364 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
16365 i
* reg_mode_size
);
16367 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
16368 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
16370 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
16371 i
* reg_mode_size
);
16373 emit_insn (gen_rtx_SET (p_dst
, p_src
));
16379 /* The __vector_pair and __vector_quad modes are multi-register
16380 modes, so if we have to load or store the registers, we have to be
16381 careful to properly swap them if we're in little endian mode
16382 below. This means the last register gets the first memory
16383 location. We also need to be careful of using the right register
16384 numbers if we are splitting XO to OO. */
16385 if (mode
== OOmode
|| mode
== XOmode
)
16387 nregs
= hard_regno_nregs (reg
, mode
);
16388 int reg_mode_nregs
= hard_regno_nregs (reg
, reg_mode
);
16391 unsigned offset
= 0;
16392 unsigned size
= GET_MODE_SIZE (reg_mode
);
16394 /* If we are reading an accumulator register, we have to
16395 deprime it before we can access it. */
16397 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
16398 emit_insn (gen_mma_xxmfacc (src
, src
));
16400 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
16403 (WORDS_BIG_ENDIAN
) ? i
: (nregs
- reg_mode_nregs
- i
);
16404 rtx dst2
= adjust_address (dst
, reg_mode
, offset
);
16405 rtx src2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
16407 emit_insn (gen_rtx_SET (dst2
, src2
));
16415 unsigned offset
= 0;
16416 unsigned size
= GET_MODE_SIZE (reg_mode
);
16418 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
16421 (WORDS_BIG_ENDIAN
) ? i
: (nregs
- reg_mode_nregs
- i
);
16422 rtx dst2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
16423 rtx src2
= adjust_address (src
, reg_mode
, offset
);
16425 emit_insn (gen_rtx_SET (dst2
, src2
));
16428 /* If we are writing an accumulator register, we have to
16429 prime it after we've written it. */
16431 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
16432 emit_insn (gen_mma_xxmtacc (dst
, dst
));
16437 if (GET_CODE (src
) == UNSPEC
)
16439 gcc_assert (XINT (src
, 1) == UNSPEC_MMA_ASSEMBLE
);
16440 gcc_assert (REG_P (dst
));
16441 if (GET_MODE (src
) == XOmode
)
16442 gcc_assert (FP_REGNO_P (REGNO (dst
)));
16443 if (GET_MODE (src
) == OOmode
)
16444 gcc_assert (VSX_REGNO_P (REGNO (dst
)));
16446 reg_mode
= GET_MODE (XVECEXP (src
, 0, 0));
16447 for (int i
= 0; i
< XVECLEN (src
, 0); i
++)
16449 rtx dst_i
= gen_rtx_REG (reg_mode
, reg
+ i
);
16450 emit_insn (gen_rtx_SET (dst_i
, XVECEXP (src
, 0, i
)));
16453 /* We are writing an accumulator register, so we have to
16454 prime it after we've written it. */
16455 if (GET_MODE (src
) == XOmode
)
16456 emit_insn (gen_mma_xxmtacc (dst
, dst
));
16461 /* Register -> register moves can use common code. */
16464 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
16466 /* If we are reading an accumulator register, we have to
16467 deprime it before we can access it. */
16469 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
16470 emit_insn (gen_mma_xxmfacc (src
, src
));
16472 /* Move register range backwards, if we might have destructive
16475 /* XO/OO are opaque so cannot use subregs. */
16476 if (mode
== OOmode
|| mode
== XOmode
)
16478 for (i
= nregs
- 1; i
>= 0; i
--)
16480 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + i
);
16481 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + i
);
16482 emit_insn (gen_rtx_SET (dst_i
, src_i
));
16487 for (i
= nregs
- 1; i
>= 0; i
--)
16488 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
16489 i
* reg_mode_size
),
16490 simplify_gen_subreg (reg_mode
, src
, mode
,
16491 i
* reg_mode_size
)));
16494 /* If we are writing an accumulator register, we have to
16495 prime it after we've written it. */
16497 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
16498 emit_insn (gen_mma_xxmtacc (dst
, dst
));
16504 bool used_update
= false;
16505 rtx restore_basereg
= NULL_RTX
;
16507 if (MEM_P (src
) && INT_REGNO_P (reg
))
16511 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
16512 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
16515 breg
= XEXP (XEXP (src
, 0), 0);
16516 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
16517 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
16518 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
16519 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
16520 src
= replace_equiv_address (src
, breg
);
16522 else if (! rs6000_offsettable_memref_p (src
, reg_mode
, true))
16524 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
16526 rtx basereg
= XEXP (XEXP (src
, 0), 0);
16529 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
16530 emit_insn (gen_rtx_SET (ndst
,
16531 gen_rtx_MEM (reg_mode
,
16533 used_update
= true;
16536 emit_insn (gen_rtx_SET (basereg
,
16537 XEXP (XEXP (src
, 0), 1)));
16538 src
= replace_equiv_address (src
, basereg
);
16542 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
16543 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
16544 src
= replace_equiv_address (src
, basereg
);
16548 breg
= XEXP (src
, 0);
16549 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
16550 breg
= XEXP (breg
, 0);
16552 /* If the base register we are using to address memory is
16553 also a destination reg, then change that register last. */
16555 && REGNO (breg
) >= REGNO (dst
)
16556 && REGNO (breg
) < REGNO (dst
) + nregs
)
16557 j
= REGNO (breg
) - REGNO (dst
);
16559 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
16563 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
16564 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
16567 breg
= XEXP (XEXP (dst
, 0), 0);
16568 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
16569 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
16570 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
16572 /* We have to update the breg before doing the store.
16573 Use store with update, if available. */
16577 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
16578 emit_insn (TARGET_32BIT
16579 ? (TARGET_POWERPC64
16580 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
16581 : gen_movsi_si_update (breg
, breg
, delta_rtx
, nsrc
))
16582 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
16583 used_update
= true;
16586 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
16587 dst
= replace_equiv_address (dst
, breg
);
16589 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
, true)
16590 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
16592 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
16594 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
16597 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
16598 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
16601 used_update
= true;
16604 emit_insn (gen_rtx_SET (basereg
,
16605 XEXP (XEXP (dst
, 0), 1)));
16606 dst
= replace_equiv_address (dst
, basereg
);
16610 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
16611 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
16612 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
16614 && REG_P (offsetreg
)
16615 && REGNO (basereg
) != REGNO (offsetreg
));
16616 if (REGNO (basereg
) == 0)
16618 rtx tmp
= offsetreg
;
16619 offsetreg
= basereg
;
16622 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
16623 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
16624 dst
= replace_equiv_address (dst
, basereg
);
16627 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
16628 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
, true));
16631 /* If we are reading an accumulator register, we have to
16632 deprime it before we can access it. */
16633 if (TARGET_MMA
&& REG_P (src
)
16634 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
16635 emit_insn (gen_mma_xxmfacc (src
, src
));
16637 for (i
= 0; i
< nregs
; i
++)
16639 /* Calculate index to next subword. */
16644 /* If compiler already emitted move of first word by
16645 store with update, no need to do anything. */
16646 if (j
== 0 && used_update
)
16649 /* XO/OO are opaque so cannot use subregs. */
16650 if (mode
== OOmode
|| mode
== XOmode
)
16652 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + j
);
16653 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + j
);
16654 emit_insn (gen_rtx_SET (dst_i
, src_i
));
16657 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
16658 j
* reg_mode_size
),
16659 simplify_gen_subreg (reg_mode
, src
, mode
,
16660 j
* reg_mode_size
)));
16663 /* If we are writing an accumulator register, we have to
16664 prime it after we've written it. */
16665 if (TARGET_MMA
&& REG_P (dst
)
16666 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
16667 emit_insn (gen_mma_xxmtacc (dst
, dst
));
16669 if (restore_basereg
!= NULL_RTX
)
16670 emit_insn (restore_basereg
);
16674 static GTY(()) alias_set_type TOC_alias_set
= -1;
16677 get_TOC_alias_set (void)
16679 if (TOC_alias_set
== -1)
16680 TOC_alias_set
= new_alias_set ();
16681 return TOC_alias_set
;
16684 /* The mode the ABI uses for a word. This is not the same as word_mode
16685 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16687 static scalar_int_mode
16688 rs6000_abi_word_mode (void)
16690 return TARGET_32BIT
? SImode
: DImode
;
16693 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16695 rs6000_offload_options (void)
16698 return xstrdup ("-foffload-abi=lp64");
16700 return xstrdup ("-foffload-abi=ilp32");
16704 /* A quick summary of the various types of 'constant-pool tables'
16707 Target Flags Name One table per
16708 AIX (none) AIX TOC object file
16709 AIX -mfull-toc AIX TOC object file
16710 AIX -mminimal-toc AIX minimal TOC translation unit
16711 SVR4/EABI (none) SVR4 SDATA object file
16712 SVR4/EABI -fpic SVR4 pic object file
16713 SVR4/EABI -fPIC SVR4 PIC translation unit
16714 SVR4/EABI -mrelocatable EABI TOC function
16715 SVR4/EABI -maix AIX TOC object file
16716 SVR4/EABI -maix -mminimal-toc
16717 AIX minimal TOC translation unit
16719 Name Reg. Set by entries contains:
16720 made by addrs? fp? sum?
16722 AIX TOC 2 crt0 as Y option option
16723 AIX minimal TOC 30 prolog gcc Y Y option
16724 SVR4 SDATA 13 crt0 gcc N Y N
16725 SVR4 pic 30 prolog ld Y not yet N
16726 SVR4 PIC 30 prolog gcc Y option option
16727 EABI TOC 30 prolog gcc Y option option
16731 /* Hash functions for the hash table. */
16734 rs6000_hash_constant (rtx k
)
16736 enum rtx_code code
= GET_CODE (k
);
16737 machine_mode mode
= GET_MODE (k
);
16738 unsigned result
= (code
<< 3) ^ mode
;
16739 const char *format
;
16742 format
= GET_RTX_FORMAT (code
);
16743 flen
= strlen (format
);
16749 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
16751 case CONST_WIDE_INT
:
16754 flen
= CONST_WIDE_INT_NUNITS (k
);
16755 for (i
= 0; i
< flen
; i
++)
16756 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
16761 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
16771 for (; fidx
< flen
; fidx
++)
16772 switch (format
[fidx
])
16777 const char *str
= XSTR (k
, fidx
);
16778 len
= strlen (str
);
16779 result
= result
* 613 + len
;
16780 for (i
= 0; i
< len
; i
++)
16781 result
= result
* 613 + (unsigned) str
[i
];
16786 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
16790 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
16793 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
16794 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
16798 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
16799 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
16806 gcc_unreachable ();
16813 toc_hasher::hash (toc_hash_struct
*thc
)
16815 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
16818 /* Compare H1 and H2 for equivalence. */
16821 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
16826 if (h1
->key_mode
!= h2
->key_mode
)
16829 return rtx_equal_p (r1
, r2
);
16832 /* These are the names given by the C++ front-end to vtables, and
16833 vtable-like objects. Ideally, this logic should not be here;
16834 instead, there should be some programmatic way of inquiring as
16835 to whether or not an object is a vtable. */
16837 #define VTABLE_NAME_P(NAME) \
16838 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16839 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16840 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16841 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16842 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16844 #ifdef NO_DOLLAR_IN_LABEL
16845 /* Return a GGC-allocated character string translating dollar signs in
16846 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16849 rs6000_xcoff_strip_dollar (const char *name
)
16855 q
= (const char *) strchr (name
, '$');
16857 if (q
== 0 || q
== name
)
16860 len
= strlen (name
);
16861 strip
= XALLOCAVEC (char, len
+ 1);
16862 strcpy (strip
, name
);
16863 p
= strip
+ (q
- name
);
16867 p
= strchr (p
+ 1, '$');
16870 return ggc_alloc_string (strip
, len
);
16875 rs6000_output_symbol_ref (FILE *file
, rtx x
)
16877 const char *name
= XSTR (x
, 0);
16879 /* Currently C++ toc references to vtables can be emitted before it
16880 is decided whether the vtable is public or private. If this is
16881 the case, then the linker will eventually complain that there is
16882 a reference to an unknown section. Thus, for vtables only,
16883 we emit the TOC reference to reference the identifier and not the
16885 if (VTABLE_NAME_P (name
))
16887 RS6000_OUTPUT_BASENAME (file
, name
);
16890 assemble_name (file
, name
);
16893 /* Output a TOC entry. We derive the entry name from what is being
16897 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
16900 const char *name
= buf
;
16902 HOST_WIDE_INT offset
= 0;
16904 gcc_assert (!TARGET_NO_TOC_OR_PCREL
);
16906 /* When the linker won't eliminate them, don't output duplicate
16907 TOC entries (this happens on AIX if there is any kind of TOC,
16908 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16910 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
16912 struct toc_hash_struct
*h
;
16914 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16915 time because GGC is not initialized at that point. */
16916 if (toc_hash_table
== NULL
)
16917 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
16919 h
= ggc_alloc
<toc_hash_struct
> ();
16921 h
->key_mode
= mode
;
16922 h
->labelno
= labelno
;
16924 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
16925 if (*found
== NULL
)
16927 else /* This is indeed a duplicate.
16928 Set this label equal to that label. */
16930 fputs ("\t.set ", file
);
16931 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
16932 fprintf (file
, "%d,", labelno
);
16933 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
16934 fprintf (file
, "%d\n", ((*found
)->labelno
));
16937 if (TARGET_XCOFF
&& SYMBOL_REF_P (x
)
16938 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
16939 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
16941 fputs ("\t.set ", file
);
16942 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
16943 fprintf (file
, "%d,", labelno
);
16944 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
16945 fprintf (file
, "%d\n", ((*found
)->labelno
));
16952 /* If we're going to put a double constant in the TOC, make sure it's
16953 aligned properly when strict alignment is on. */
16954 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
16955 && STRICT_ALIGNMENT
16956 && GET_MODE_BITSIZE (mode
) >= 64
16957 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
16958 ASM_OUTPUT_ALIGN (file
, 3);
16961 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
16963 /* Handle FP constants specially. Note that if we have a minimal
16964 TOC, things we put here aren't actually in the TOC, so we can allow
16966 if (CONST_DOUBLE_P (x
)
16967 && (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
16968 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
16972 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
16973 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
16975 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
16979 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16980 fputs (DOUBLE_INT_ASM_OP
, file
);
16982 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16983 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
16984 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
16985 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
16986 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
16987 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
16988 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
16989 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
16994 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
16995 fputs ("\t.long ", file
);
16997 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16998 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
16999 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17000 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17001 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17002 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17006 else if (CONST_DOUBLE_P (x
)
17007 && (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
17011 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17012 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17014 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17018 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17019 fputs (DOUBLE_INT_ASM_OP
, file
);
17021 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17022 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17023 fprintf (file
, "0x%lx%08lx\n",
17024 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17025 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
17030 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17031 fputs ("\t.long ", file
);
17033 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17034 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17035 fprintf (file
, "0x%lx,0x%lx\n",
17036 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17040 else if (CONST_DOUBLE_P (x
)
17041 && (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
17045 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17046 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17048 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17052 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17053 fputs (DOUBLE_INT_ASM_OP
, file
);
17055 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17056 if (WORDS_BIG_ENDIAN
)
17057 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
17059 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17064 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17065 fputs ("\t.long ", file
);
17067 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17068 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17072 else if (GET_MODE (x
) == VOIDmode
&& CONST_INT_P (x
))
17074 unsigned HOST_WIDE_INT low
;
17075 HOST_WIDE_INT high
;
17077 low
= INTVAL (x
) & 0xffffffff;
17078 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
17080 /* TOC entries are always Pmode-sized, so when big-endian
17081 smaller integer constants in the TOC need to be padded.
17082 (This is still a win over putting the constants in
17083 a separate constant pool, because then we'd have
17084 to have both a TOC entry _and_ the actual constant.)
17086 For a 32-bit target, CONST_INT values are loaded and shifted
17087 entirely within `low' and can be stored in one TOC entry. */
17089 /* It would be easy to make this work, but it doesn't now. */
17090 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
17092 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
17095 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
17096 high
= (HOST_WIDE_INT
) low
>> 32;
17102 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17103 fputs (DOUBLE_INT_ASM_OP
, file
);
17105 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17106 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17107 fprintf (file
, "0x%lx%08lx\n",
17108 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17113 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
17115 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17116 fputs ("\t.long ", file
);
17118 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17119 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17120 fprintf (file
, "0x%lx,0x%lx\n",
17121 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17125 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17126 fputs ("\t.long ", file
);
17128 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
17129 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
17135 if (GET_CODE (x
) == CONST
)
17137 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
17138 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)));
17140 base
= XEXP (XEXP (x
, 0), 0);
17141 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
17144 switch (GET_CODE (base
))
17147 name
= XSTR (base
, 0);
17151 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
17152 CODE_LABEL_NUMBER (XEXP (base
, 0)));
17156 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
17160 gcc_unreachable ();
17163 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17164 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
17167 fputs ("\t.tc ", file
);
17168 RS6000_OUTPUT_BASENAME (file
, name
);
17171 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
17173 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
17175 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17176 after other TOC symbols, reducing overflow of small TOC access
17177 to [TC] symbols. */
17178 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
17179 ? "[TE]," : "[TC],", file
);
17182 /* Currently C++ toc references to vtables can be emitted before it
17183 is decided whether the vtable is public or private. If this is
17184 the case, then the linker will eventually complain that there is
17185 a TOC reference to an unknown section. Thus, for vtables only,
17186 we emit the TOC reference to reference the symbol and not the
17188 if (VTABLE_NAME_P (name
))
17190 RS6000_OUTPUT_BASENAME (file
, name
);
17192 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
17193 else if (offset
> 0)
17194 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
17197 output_addr_const (file
, x
);
17200 if (TARGET_XCOFF
&& SYMBOL_REF_P (base
))
17202 switch (SYMBOL_REF_TLS_MODEL (base
))
17206 case TLS_MODEL_LOCAL_EXEC
:
17207 fputs ("@le", file
);
17209 case TLS_MODEL_INITIAL_EXEC
:
17210 fputs ("@ie", file
);
17212 /* Use global-dynamic for local-dynamic. */
17213 case TLS_MODEL_GLOBAL_DYNAMIC
:
17214 case TLS_MODEL_LOCAL_DYNAMIC
:
17216 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
17217 fputs ("\t.tc .", file
);
17218 RS6000_OUTPUT_BASENAME (file
, name
);
17219 fputs ("[TC],", file
);
17220 output_addr_const (file
, x
);
17221 fputs ("@m", file
);
17224 gcc_unreachable ();
17232 /* Output an assembler pseudo-op to write an ASCII string of N characters
17233 starting at P to FILE.
17235 On the RS/6000, we have to do this using the .byte operation and
17236 write out special characters outside the quoted string.
17237 Also, the assembler is broken; very long strings are truncated,
17238 so we must artificially break them up early. */
17241 output_ascii (FILE *file
, const char *p
, int n
)
17244 int i
, count_string
;
17245 const char *for_string
= "\t.byte \"";
17246 const char *for_decimal
= "\t.byte ";
17247 const char *to_close
= NULL
;
17250 for (i
= 0; i
< n
; i
++)
17253 if (c
>= ' ' && c
< 0177)
17256 fputs (for_string
, file
);
17259 /* Write two quotes to get one. */
17267 for_decimal
= "\"\n\t.byte ";
17271 if (count_string
>= 512)
17273 fputs (to_close
, file
);
17275 for_string
= "\t.byte \"";
17276 for_decimal
= "\t.byte ";
17284 fputs (for_decimal
, file
);
17285 fprintf (file
, "%d", c
);
17287 for_string
= "\n\t.byte \"";
17288 for_decimal
= ", ";
17294 /* Now close the string if we have written one. Then end the line. */
17296 fputs (to_close
, file
);
17299 /* Generate a unique section name for FILENAME for a section type
17300 represented by SECTION_DESC. Output goes into BUF.
17302 SECTION_DESC can be any string, as long as it is different for each
17303 possible section type.
17305 We name the section in the same manner as xlc. The name begins with an
17306 underscore followed by the filename (after stripping any leading directory
17307 names) with the last period replaced by the string SECTION_DESC. If
17308 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17312 rs6000_gen_section_name (char **buf
, const char *filename
,
17313 const char *section_desc
)
17315 const char *q
, *after_last_slash
, *last_period
= 0;
17319 after_last_slash
= filename
;
17320 for (q
= filename
; *q
; q
++)
17323 after_last_slash
= q
+ 1;
17324 else if (*q
== '.')
17328 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
17329 *buf
= (char *) xmalloc (len
);
17334 for (q
= after_last_slash
; *q
; q
++)
17336 if (q
== last_period
)
17338 strcpy (p
, section_desc
);
17339 p
+= strlen (section_desc
);
17343 else if (ISALNUM (*q
))
17347 if (last_period
== 0)
17348 strcpy (p
, section_desc
);
17353 /* Emit profile function. */
17356 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
17358 /* Non-standard profiling for kernels, which just saves LR then calls
17359 _mcount without worrying about arg saves. The idea is to change
17360 the function prologue as little as possible as it isn't easy to
17361 account for arg save/restore code added just for _mcount. */
17362 if (TARGET_PROFILE_KERNEL
)
17365 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
17367 #ifndef NO_PROFILE_COUNTERS
17368 # define NO_PROFILE_COUNTERS 0
17370 if (NO_PROFILE_COUNTERS
)
17371 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17372 LCT_NORMAL
, VOIDmode
);
17376 const char *label_name
;
17379 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17380 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
17381 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
17383 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17384 LCT_NORMAL
, VOIDmode
, fun
, Pmode
);
17387 else if (DEFAULT_ABI
== ABI_DARWIN
)
17389 const char *mcount_name
= RS6000_MCOUNT
;
17390 int caller_addr_regno
= LR_REGNO
;
17392 /* Be conservative and always set this, at least for now. */
17393 crtl
->uses_pic_offset_table
= 1;
17396 /* For PIC code, set up a stub and collect the caller's address
17397 from r0, which is where the prologue puts it. */
17398 if (MACHOPIC_INDIRECT
17399 && crtl
->uses_pic_offset_table
)
17400 caller_addr_regno
= 0;
17402 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
17403 LCT_NORMAL
, VOIDmode
,
17404 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
17408 /* Write function profiler code. */
17411 output_function_profiler (FILE *file
, int labelno
)
17415 switch (DEFAULT_ABI
)
17418 gcc_unreachable ();
17423 warning (0, "no profiling of 64-bit code for this ABI");
17426 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17427 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
17428 if (NO_PROFILE_COUNTERS
)
17430 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17431 reg_names
[0], reg_names
[1]);
17433 else if (TARGET_SECURE_PLT
&& flag_pic
)
17435 if (TARGET_LINK_STACK
)
17438 get_ppc476_thunk_name (name
);
17439 asm_fprintf (file
, "\tbl %s\n", name
);
17442 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
17443 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17444 reg_names
[0], reg_names
[1]);
17445 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17446 asm_fprintf (file
, "\taddis %s,%s,",
17447 reg_names
[12], reg_names
[12]);
17448 assemble_name (file
, buf
);
17449 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
17450 assemble_name (file
, buf
);
17451 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
17453 else if (flag_pic
== 1)
17455 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
17456 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17457 reg_names
[0], reg_names
[1]);
17458 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17459 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
17460 assemble_name (file
, buf
);
17461 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
17463 else if (flag_pic
> 1)
17465 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17466 reg_names
[0], reg_names
[1]);
17467 /* Now, we need to get the address of the label. */
17468 if (TARGET_LINK_STACK
)
17471 get_ppc476_thunk_name (name
);
17472 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
17473 assemble_name (file
, buf
);
17474 fputs ("-.\n1:", file
);
17475 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17476 asm_fprintf (file
, "\taddi %s,%s,4\n",
17477 reg_names
[11], reg_names
[11]);
17481 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
17482 assemble_name (file
, buf
);
17483 fputs ("-.\n1:", file
);
17484 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17486 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
17487 reg_names
[0], reg_names
[11]);
17488 asm_fprintf (file
, "\tadd %s,%s,%s\n",
17489 reg_names
[0], reg_names
[0], reg_names
[11]);
17493 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
17494 assemble_name (file
, buf
);
17495 fputs ("@ha\n", file
);
17496 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17497 reg_names
[0], reg_names
[1]);
17498 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
17499 assemble_name (file
, buf
);
17500 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
17503 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17504 fprintf (file
, "\tbl %s%s\n",
17505 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
17511 /* Don't do anything, done in output_profile_hook (). */
17518 /* The following variable value is the last issued insn. */
17520 static rtx_insn
*last_scheduled_insn
;
17522 /* The following variable helps to balance issuing of load and
17523 store instructions */
17525 static int load_store_pendulum
;
17527 /* The following variable helps pair divide insns during scheduling. */
17528 static int divide_cnt
;
17529 /* The following variable helps pair and alternate vector and vector load
17530 insns during scheduling. */
17531 static int vec_pairing
;
17534 /* Power4 load update and store update instructions are cracked into a
17535 load or store and an integer insn which are executed in the same cycle.
17536 Branches have their own dispatch slot which does not count against the
17537 GCC issue rate, but it changes the program flow so there are no other
17538 instructions to issue in this cycle. */
17541 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
17543 last_scheduled_insn
= insn
;
17544 if (GET_CODE (PATTERN (insn
)) == USE
17545 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17547 cached_can_issue_more
= more
;
17548 return cached_can_issue_more
;
17551 if (insn_terminates_group_p (insn
, current_group
))
17553 cached_can_issue_more
= 0;
17554 return cached_can_issue_more
;
17557 /* If no reservation, but reach here */
17558 if (recog_memoized (insn
) < 0)
17561 if (rs6000_sched_groups
)
17563 if (is_microcoded_insn (insn
))
17564 cached_can_issue_more
= 0;
17565 else if (is_cracked_insn (insn
))
17566 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
17568 cached_can_issue_more
= more
- 1;
17570 return cached_can_issue_more
;
17573 if (rs6000_tune
== PROCESSOR_CELL
&& is_nonpipeline_insn (insn
))
17576 cached_can_issue_more
= more
- 1;
17577 return cached_can_issue_more
;
17581 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
17583 int r
= rs6000_variable_issue_1 (insn
, more
);
17585 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
17589 /* Adjust the cost of a scheduling dependency. Return the new cost of
17590 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17593 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
17596 enum attr_type attr_type
;
17598 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
17605 /* Data dependency; DEP_INSN writes a register that INSN reads
17606 some cycles later. */
17608 /* Separate a load from a narrower, dependent store. */
17609 if ((rs6000_sched_groups
|| rs6000_tune
== PROCESSOR_POWER9
17610 || rs6000_tune
== PROCESSOR_POWER10
)
17611 && GET_CODE (PATTERN (insn
)) == SET
17612 && GET_CODE (PATTERN (dep_insn
)) == SET
17613 && MEM_P (XEXP (PATTERN (insn
), 1))
17614 && MEM_P (XEXP (PATTERN (dep_insn
), 0))
17615 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
17616 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
17619 attr_type
= get_attr_type (insn
);
17624 /* Tell the first scheduling pass about the latency between
17625 a mtctr and bctr (and mtlr and br/blr). The first
17626 scheduling pass will not know about this latency since
17627 the mtctr instruction, which has the latency associated
17628 to it, will be generated by reload. */
17631 /* Leave some extra cycles between a compare and its
17632 dependent branch, to inhibit expensive mispredicts. */
17633 if ((rs6000_tune
== PROCESSOR_PPC603
17634 || rs6000_tune
== PROCESSOR_PPC604
17635 || rs6000_tune
== PROCESSOR_PPC604e
17636 || rs6000_tune
== PROCESSOR_PPC620
17637 || rs6000_tune
== PROCESSOR_PPC630
17638 || rs6000_tune
== PROCESSOR_PPC750
17639 || rs6000_tune
== PROCESSOR_PPC7400
17640 || rs6000_tune
== PROCESSOR_PPC7450
17641 || rs6000_tune
== PROCESSOR_PPCE5500
17642 || rs6000_tune
== PROCESSOR_PPCE6500
17643 || rs6000_tune
== PROCESSOR_POWER4
17644 || rs6000_tune
== PROCESSOR_POWER5
17645 || rs6000_tune
== PROCESSOR_POWER7
17646 || rs6000_tune
== PROCESSOR_POWER8
17647 || rs6000_tune
== PROCESSOR_POWER9
17648 || rs6000_tune
== PROCESSOR_POWER10
17649 || rs6000_tune
== PROCESSOR_CELL
)
17650 && recog_memoized (dep_insn
)
17651 && (INSN_CODE (dep_insn
) >= 0))
17653 switch (get_attr_type (dep_insn
))
17656 case TYPE_FPCOMPARE
:
17657 case TYPE_CR_LOGICAL
:
17661 if (get_attr_dot (dep_insn
) == DOT_YES
)
17666 if (get_attr_dot (dep_insn
) == DOT_YES
17667 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
17678 if ((rs6000_tune
== PROCESSOR_POWER6
)
17679 && recog_memoized (dep_insn
)
17680 && (INSN_CODE (dep_insn
) >= 0))
17683 if (GET_CODE (PATTERN (insn
)) != SET
)
17684 /* If this happens, we have to extend this to schedule
17685 optimally. Return default for now. */
17688 /* Adjust the cost for the case where the value written
17689 by a fixed point operation is used as the address
17690 gen value on a store. */
17691 switch (get_attr_type (dep_insn
))
17696 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17697 return get_attr_sign_extend (dep_insn
)
17698 == SIGN_EXTEND_YES
? 6 : 4;
17703 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17704 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
17714 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17722 if (get_attr_update (dep_insn
) == UPDATE_YES
17723 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
17729 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17735 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17736 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
17746 if ((rs6000_tune
== PROCESSOR_POWER6
)
17747 && recog_memoized (dep_insn
)
17748 && (INSN_CODE (dep_insn
) >= 0))
17751 /* Adjust the cost for the case where the value written
17752 by a fixed point instruction is used within the address
17753 gen portion of a subsequent load(u)(x) */
17754 switch (get_attr_type (dep_insn
))
17759 if (set_to_load_agen (dep_insn
, insn
))
17760 return get_attr_sign_extend (dep_insn
)
17761 == SIGN_EXTEND_YES
? 6 : 4;
17766 if (set_to_load_agen (dep_insn
, insn
))
17767 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
17777 if (set_to_load_agen (dep_insn
, insn
))
17785 if (get_attr_update (dep_insn
) == UPDATE_YES
17786 && set_to_load_agen (dep_insn
, insn
))
17792 if (set_to_load_agen (dep_insn
, insn
))
17798 if (set_to_load_agen (dep_insn
, insn
))
17799 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
17812 /* Fall out to return default cost. */
17816 case REG_DEP_OUTPUT
:
17817 /* Output dependency; DEP_INSN writes a register that INSN writes some
17819 if ((rs6000_tune
== PROCESSOR_POWER6
)
17820 && recog_memoized (dep_insn
)
17821 && (INSN_CODE (dep_insn
) >= 0))
17823 attr_type
= get_attr_type (insn
);
17828 case TYPE_FPSIMPLE
:
17829 if (get_attr_type (dep_insn
) == TYPE_FP
17830 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
17837 /* Fall through, no cost for output dependency. */
17841 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17846 gcc_unreachable ();
17852 /* Debug version of rs6000_adjust_cost. */
17855 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
17856 int cost
, unsigned int dw
)
17858 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
17866 default: dep
= "unknown depencency"; break;
17867 case REG_DEP_TRUE
: dep
= "data dependency"; break;
17868 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
17869 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
17873 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17874 "%s, insn:\n", ret
, cost
, dep
);
17882 /* The function returns a true if INSN is microcoded.
17883 Return false otherwise. */
17886 is_microcoded_insn (rtx_insn
*insn
)
17888 if (!insn
|| !NONDEBUG_INSN_P (insn
)
17889 || GET_CODE (PATTERN (insn
)) == USE
17890 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17893 if (rs6000_tune
== PROCESSOR_CELL
)
17894 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
17896 if (rs6000_sched_groups
17897 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
17899 enum attr_type type
= get_attr_type (insn
);
17900 if ((type
== TYPE_LOAD
17901 && get_attr_update (insn
) == UPDATE_YES
17902 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
17903 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
17904 && get_attr_update (insn
) == UPDATE_YES
17905 && get_attr_indexed (insn
) == INDEXED_YES
)
17906 || type
== TYPE_MFCR
)
17913 /* The function returns true if INSN is cracked into 2 instructions
17914 by the processor (and therefore occupies 2 issue slots). */
17917 is_cracked_insn (rtx_insn
*insn
)
17919 if (!insn
|| !NONDEBUG_INSN_P (insn
)
17920 || GET_CODE (PATTERN (insn
)) == USE
17921 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17924 if (rs6000_sched_groups
17925 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
17927 enum attr_type type
= get_attr_type (insn
);
17928 if ((type
== TYPE_LOAD
17929 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
17930 && get_attr_update (insn
) == UPDATE_NO
)
17931 || (type
== TYPE_LOAD
17932 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
17933 && get_attr_update (insn
) == UPDATE_YES
17934 && get_attr_indexed (insn
) == INDEXED_NO
)
17935 || (type
== TYPE_STORE
17936 && get_attr_update (insn
) == UPDATE_YES
17937 && get_attr_indexed (insn
) == INDEXED_NO
)
17938 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
17939 && get_attr_update (insn
) == UPDATE_YES
)
17940 || (type
== TYPE_CR_LOGICAL
17941 && get_attr_cr_logical_3op (insn
) == CR_LOGICAL_3OP_YES
)
17942 || (type
== TYPE_EXTS
17943 && get_attr_dot (insn
) == DOT_YES
)
17944 || (type
== TYPE_SHIFT
17945 && get_attr_dot (insn
) == DOT_YES
17946 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
17947 || (type
== TYPE_MUL
17948 && get_attr_dot (insn
) == DOT_YES
)
17949 || type
== TYPE_DIV
17950 || (type
== TYPE_INSERT
17951 && get_attr_size (insn
) == SIZE_32
))
17958 /* The function returns true if INSN can be issued only from
17959 the branch slot. */
17962 is_branch_slot_insn (rtx_insn
*insn
)
17964 if (!insn
|| !NONDEBUG_INSN_P (insn
)
17965 || GET_CODE (PATTERN (insn
)) == USE
17966 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17969 if (rs6000_sched_groups
)
17971 enum attr_type type
= get_attr_type (insn
);
17972 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
17980 /* The function returns true if out_inst sets a value that is
17981 used in the address generation computation of in_insn */
17983 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
17985 rtx out_set
, in_set
;
17987 /* For performance reasons, only handle the simple case where
17988 both loads are a single_set. */
17989 out_set
= single_set (out_insn
);
17992 in_set
= single_set (in_insn
);
17994 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
18000 /* Try to determine base/offset/size parts of the given MEM.
18001 Return true if successful, false if all the values couldn't
18004 This function only looks for REG or REG+CONST address forms.
18005 REG+REG address form will return false. */
18008 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
18009 HOST_WIDE_INT
*size
)
18012 if MEM_SIZE_KNOWN_P (mem
)
18013 *size
= MEM_SIZE (mem
);
18017 addr_rtx
= (XEXP (mem
, 0));
18018 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
18019 addr_rtx
= XEXP (addr_rtx
, 1);
18022 while (GET_CODE (addr_rtx
) == PLUS
18023 && CONST_INT_P (XEXP (addr_rtx
, 1)))
18025 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
18026 addr_rtx
= XEXP (addr_rtx
, 0);
18028 if (!REG_P (addr_rtx
))
18035 /* The function returns true if the target storage location of
18036 mem1 is adjacent to the target storage location of mem2 */
18037 /* Return 1 if memory locations are adjacent. */
18040 adjacent_mem_locations (rtx mem1
, rtx mem2
)
18043 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18045 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18046 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
18047 return ((REGNO (reg1
) == REGNO (reg2
))
18048 && ((off1
+ size1
== off2
)
18049 || (off2
+ size2
== off1
)));
18054 /* This function returns true if it can be determined that the two MEM
18055 locations overlap by at least 1 byte based on base reg/offset/size. */
18058 mem_locations_overlap (rtx mem1
, rtx mem2
)
18061 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18063 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18064 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
18065 return ((REGNO (reg1
) == REGNO (reg2
))
18066 && (((off1
<= off2
) && (off1
+ size1
> off2
))
18067 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
18072 /* A C statement (sans semicolon) to update the integer scheduling
18073 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18074 INSN earlier, reduce the priority to execute INSN later. Do not
18075 define this macro if you do not need to adjust the scheduling
18076 priorities of insns. */
18079 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
18081 rtx load_mem
, str_mem
;
18082 /* On machines (like the 750) which have asymmetric integer units,
18083 where one integer unit can do multiply and divides and the other
18084 can't, reduce the priority of multiply/divide so it is scheduled
18085 before other integer operations. */
18088 if (! INSN_P (insn
))
18091 if (GET_CODE (PATTERN (insn
)) == USE
)
18094 switch (rs6000_tune
) {
18095 case PROCESSOR_PPC750
:
18096 switch (get_attr_type (insn
))
18103 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
18104 priority
, priority
);
18105 if (priority
>= 0 && priority
< 0x01000000)
18112 if (insn_must_be_first_in_group (insn
)
18113 && reload_completed
18114 && current_sched_info
->sched_max_insns_priority
18115 && rs6000_sched_restricted_insns_priority
)
18118 /* Prioritize insns that can be dispatched only in the first
18120 if (rs6000_sched_restricted_insns_priority
== 1)
18121 /* Attach highest priority to insn. This means that in
18122 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
18123 precede 'priority' (critical path) considerations. */
18124 return current_sched_info
->sched_max_insns_priority
;
18125 else if (rs6000_sched_restricted_insns_priority
== 2)
18126 /* Increase priority of insn by a minimal amount. This means that in
18127 haifa-sched.c:ready_sort(), only 'priority' (critical path)
18128 considerations precede dispatch-slot restriction considerations. */
18129 return (priority
+ 1);
18132 if (rs6000_tune
== PROCESSOR_POWER6
18133 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
18134 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
18135 /* Attach highest priority to insn if the scheduler has just issued two
18136 stores and this instruction is a load, or two loads and this instruction
18137 is a store. Power6 wants loads and stores scheduled alternately
18139 return current_sched_info
->sched_max_insns_priority
;
18144 /* Return true if the instruction is nonpipelined on the Cell. */
18146 is_nonpipeline_insn (rtx_insn
*insn
)
18148 enum attr_type type
;
18149 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18150 || GET_CODE (PATTERN (insn
)) == USE
18151 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18154 type
= get_attr_type (insn
);
18155 if (type
== TYPE_MUL
18156 || type
== TYPE_DIV
18157 || type
== TYPE_SDIV
18158 || type
== TYPE_DDIV
18159 || type
== TYPE_SSQRT
18160 || type
== TYPE_DSQRT
18161 || type
== TYPE_MFCR
18162 || type
== TYPE_MFCRF
18163 || type
== TYPE_MFJMPR
)
18171 /* Return how many instructions the machine can issue per cycle. */
18174 rs6000_issue_rate (void)
18176 /* Unless scheduling for register pressure, use issue rate of 1 for
18177 first scheduling pass to decrease degradation. */
18178 if (!reload_completed
&& !flag_sched_pressure
)
18181 switch (rs6000_tune
) {
18182 case PROCESSOR_RS64A
:
18183 case PROCESSOR_PPC601
: /* ? */
18184 case PROCESSOR_PPC7450
:
18186 case PROCESSOR_PPC440
:
18187 case PROCESSOR_PPC603
:
18188 case PROCESSOR_PPC750
:
18189 case PROCESSOR_PPC7400
:
18190 case PROCESSOR_PPC8540
:
18191 case PROCESSOR_PPC8548
:
18192 case PROCESSOR_CELL
:
18193 case PROCESSOR_PPCE300C2
:
18194 case PROCESSOR_PPCE300C3
:
18195 case PROCESSOR_PPCE500MC
:
18196 case PROCESSOR_PPCE500MC64
:
18197 case PROCESSOR_PPCE5500
:
18198 case PROCESSOR_PPCE6500
:
18199 case PROCESSOR_TITAN
:
18201 case PROCESSOR_PPC476
:
18202 case PROCESSOR_PPC604
:
18203 case PROCESSOR_PPC604e
:
18204 case PROCESSOR_PPC620
:
18205 case PROCESSOR_PPC630
:
18207 case PROCESSOR_POWER4
:
18208 case PROCESSOR_POWER5
:
18209 case PROCESSOR_POWER6
:
18210 case PROCESSOR_POWER7
:
18212 case PROCESSOR_POWER8
:
18214 case PROCESSOR_POWER9
:
18215 case PROCESSOR_POWER10
:
18222 /* Return how many instructions to look ahead for better insn
18226 rs6000_use_sched_lookahead (void)
18228 switch (rs6000_tune
)
18230 case PROCESSOR_PPC8540
:
18231 case PROCESSOR_PPC8548
:
18234 case PROCESSOR_CELL
:
18235 return (reload_completed
? 8 : 0);
18242 /* We are choosing insn from the ready queue. Return zero if INSN can be
18245 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
18247 if (ready_index
== 0)
18250 if (rs6000_tune
!= PROCESSOR_CELL
)
18253 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
18255 if (!reload_completed
18256 || is_nonpipeline_insn (insn
)
18257 || is_microcoded_insn (insn
))
18263 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18264 and return true. */
18267 find_mem_ref (rtx pat
, rtx
*mem_ref
)
18272 /* stack_tie does not produce any real memory traffic. */
18273 if (tie_operand (pat
, VOIDmode
))
18282 /* Recursively process the pattern. */
18283 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
18285 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
18289 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
18292 else if (fmt
[i
] == 'E')
18293 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
18295 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
18303 /* Determine if PAT is a PATTERN of a load insn. */
18306 is_load_insn1 (rtx pat
, rtx
*load_mem
)
18308 if (!pat
|| pat
== NULL_RTX
)
18311 if (GET_CODE (pat
) == SET
)
18312 return find_mem_ref (SET_SRC (pat
), load_mem
);
18314 if (GET_CODE (pat
) == PARALLEL
)
18318 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18319 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
18326 /* Determine if INSN loads from memory. */
18329 is_load_insn (rtx insn
, rtx
*load_mem
)
18331 if (!insn
|| !INSN_P (insn
))
18337 return is_load_insn1 (PATTERN (insn
), load_mem
);
18340 /* Determine if PAT is a PATTERN of a store insn. */
18343 is_store_insn1 (rtx pat
, rtx
*str_mem
)
18345 if (!pat
|| pat
== NULL_RTX
)
18348 if (GET_CODE (pat
) == SET
)
18349 return find_mem_ref (SET_DEST (pat
), str_mem
);
18351 if (GET_CODE (pat
) == PARALLEL
)
18355 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18356 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
18363 /* Determine if INSN stores to memory. */
18366 is_store_insn (rtx insn
, rtx
*str_mem
)
18368 if (!insn
|| !INSN_P (insn
))
18371 return is_store_insn1 (PATTERN (insn
), str_mem
);
18374 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18377 is_power9_pairable_vec_type (enum attr_type type
)
18381 case TYPE_VECSIMPLE
:
18382 case TYPE_VECCOMPLEX
:
18386 case TYPE_VECFLOAT
:
18388 case TYPE_VECDOUBLE
:
18396 /* Returns whether the dependence between INSN and NEXT is considered
18397 costly by the given target. */
18400 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
18404 rtx load_mem
, str_mem
;
18406 /* If the flag is not enabled - no dependence is considered costly;
18407 allow all dependent insns in the same group.
18408 This is the most aggressive option. */
18409 if (rs6000_sched_costly_dep
== no_dep_costly
)
18412 /* If the flag is set to 1 - a dependence is always considered costly;
18413 do not allow dependent instructions in the same group.
18414 This is the most conservative option. */
18415 if (rs6000_sched_costly_dep
== all_deps_costly
)
18418 insn
= DEP_PRO (dep
);
18419 next
= DEP_CON (dep
);
18421 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
18422 && is_load_insn (next
, &load_mem
)
18423 && is_store_insn (insn
, &str_mem
))
18424 /* Prevent load after store in the same group. */
18427 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
18428 && is_load_insn (next
, &load_mem
)
18429 && is_store_insn (insn
, &str_mem
)
18430 && DEP_TYPE (dep
) == REG_DEP_TRUE
18431 && mem_locations_overlap(str_mem
, load_mem
))
18432 /* Prevent load after store in the same group if it is a true
18436 /* The flag is set to X; dependences with latency >= X are considered costly,
18437 and will not be scheduled in the same group. */
18438 if (rs6000_sched_costly_dep
<= max_dep_latency
18439 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
18445 /* Return the next insn after INSN that is found before TAIL is reached,
18446 skipping any "non-active" insns - insns that will not actually occupy
18447 an issue slot. Return NULL_RTX if such an insn is not found. */
18450 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
18452 if (insn
== NULL_RTX
|| insn
== tail
)
18457 insn
= NEXT_INSN (insn
);
18458 if (insn
== NULL_RTX
|| insn
== tail
)
18462 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
18463 || (NONJUMP_INSN_P (insn
)
18464 && GET_CODE (PATTERN (insn
)) != USE
18465 && GET_CODE (PATTERN (insn
)) != CLOBBER
18466 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
18472 /* Move instruction at POS to the end of the READY list. */
18475 move_to_end_of_ready (rtx_insn
**ready
, int pos
, int lastpos
)
18481 for (i
= pos
; i
< lastpos
; i
++)
18482 ready
[i
] = ready
[i
+ 1];
18483 ready
[lastpos
] = tmp
;
18486 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18489 power6_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
18491 /* For Power6, we need to handle some special cases to try and keep the
18492 store queue from overflowing and triggering expensive flushes.
18494 This code monitors how load and store instructions are being issued
18495 and skews the ready list one way or the other to increase the likelihood
18496 that a desired instruction is issued at the proper time.
18498 A couple of things are done. First, we maintain a "load_store_pendulum"
18499 to track the current state of load/store issue.
18501 - If the pendulum is at zero, then no loads or stores have been
18502 issued in the current cycle so we do nothing.
18504 - If the pendulum is 1, then a single load has been issued in this
18505 cycle and we attempt to locate another load in the ready list to
18508 - If the pendulum is -2, then two stores have already been
18509 issued in this cycle, so we increase the priority of the first load
18510 in the ready list to increase it's likelihood of being chosen first
18513 - If the pendulum is -1, then a single store has been issued in this
18514 cycle and we attempt to locate another store in the ready list to
18515 issue with it, preferring a store to an adjacent memory location to
18516 facilitate store pairing in the store queue.
18518 - If the pendulum is 2, then two loads have already been
18519 issued in this cycle, so we increase the priority of the first store
18520 in the ready list to increase it's likelihood of being chosen first
18523 - If the pendulum < -2 or > 2, then do nothing.
18525 Note: This code covers the most common scenarios. There exist non
18526 load/store instructions which make use of the LSU and which
18527 would need to be accounted for to strictly model the behavior
18528 of the machine. Those instructions are currently unaccounted
18529 for to help minimize compile time overhead of this code.
18532 rtx load_mem
, str_mem
;
18534 if (is_store_insn (last_scheduled_insn
, &str_mem
))
18535 /* Issuing a store, swing the load_store_pendulum to the left */
18536 load_store_pendulum
--;
18537 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
18538 /* Issuing a load, swing the load_store_pendulum to the right */
18539 load_store_pendulum
++;
18541 return cached_can_issue_more
;
18543 /* If the pendulum is balanced, or there is only one instruction on
18544 the ready list, then all is well, so return. */
18545 if ((load_store_pendulum
== 0) || (lastpos
<= 0))
18546 return cached_can_issue_more
;
18548 if (load_store_pendulum
== 1)
18550 /* A load has been issued in this cycle. Scan the ready list
18551 for another load to issue with it */
18556 if (is_load_insn (ready
[pos
], &load_mem
))
18558 /* Found a load. Move it to the head of the ready list,
18559 and adjust it's priority so that it is more likely to
18561 move_to_end_of_ready (ready
, pos
, lastpos
);
18563 if (!sel_sched_p ()
18564 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18565 INSN_PRIORITY (ready
[lastpos
])++;
18571 else if (load_store_pendulum
== -2)
18573 /* Two stores have been issued in this cycle. Increase the
18574 priority of the first load in the ready list to favor it for
18575 issuing in the next cycle. */
18580 if (is_load_insn (ready
[pos
], &load_mem
)
18582 && INSN_PRIORITY_KNOWN (ready
[pos
]))
18584 INSN_PRIORITY (ready
[pos
])++;
18586 /* Adjust the pendulum to account for the fact that a load
18587 was found and increased in priority. This is to prevent
18588 increasing the priority of multiple loads */
18589 load_store_pendulum
--;
18596 else if (load_store_pendulum
== -1)
18598 /* A store has been issued in this cycle. Scan the ready list for
18599 another store to issue with it, preferring a store to an adjacent
18601 int first_store_pos
= -1;
18607 if (is_store_insn (ready
[pos
], &str_mem
))
18610 /* Maintain the index of the first store found on the
18612 if (first_store_pos
== -1)
18613 first_store_pos
= pos
;
18615 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
18616 && adjacent_mem_locations (str_mem
, str_mem2
))
18618 /* Found an adjacent store. Move it to the head of the
18619 ready list, and adjust it's priority so that it is
18620 more likely to stay there */
18621 move_to_end_of_ready (ready
, pos
, lastpos
);
18623 if (!sel_sched_p ()
18624 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18625 INSN_PRIORITY (ready
[lastpos
])++;
18627 first_store_pos
= -1;
18635 if (first_store_pos
>= 0)
18637 /* An adjacent store wasn't found, but a non-adjacent store was,
18638 so move the non-adjacent store to the front of the ready
18639 list, and adjust its priority so that it is more likely to
18641 move_to_end_of_ready (ready
, first_store_pos
, lastpos
);
18642 if (!sel_sched_p ()
18643 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18644 INSN_PRIORITY (ready
[lastpos
])++;
18647 else if (load_store_pendulum
== 2)
18649 /* Two loads have been issued in this cycle. Increase the priority
18650 of the first store in the ready list to favor it for issuing in
18656 if (is_store_insn (ready
[pos
], &str_mem
)
18658 && INSN_PRIORITY_KNOWN (ready
[pos
]))
18660 INSN_PRIORITY (ready
[pos
])++;
18662 /* Adjust the pendulum to account for the fact that a store
18663 was found and increased in priority. This is to prevent
18664 increasing the priority of multiple stores */
18665 load_store_pendulum
++;
18673 return cached_can_issue_more
;
18676 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18679 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
18682 enum attr_type type
, type2
;
18684 type
= get_attr_type (last_scheduled_insn
);
18686 /* Try to issue fixed point divides back-to-back in pairs so they will be
18687 routed to separate execution units and execute in parallel. */
18688 if (type
== TYPE_DIV
&& divide_cnt
== 0)
18690 /* First divide has been scheduled. */
18693 /* Scan the ready list looking for another divide, if found move it
18694 to the end of the list so it is chosen next. */
18698 if (recog_memoized (ready
[pos
]) >= 0
18699 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
18701 move_to_end_of_ready (ready
, pos
, lastpos
);
18709 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18712 /* The best dispatch throughput for vector and vector load insns can be
18713 achieved by interleaving a vector and vector load such that they'll
18714 dispatch to the same superslice. If this pairing cannot be achieved
18715 then it is best to pair vector insns together and vector load insns
18718 To aid in this pairing, vec_pairing maintains the current state with
18719 the following values:
18721 0 : Initial state, no vecload/vector pairing has been started.
18723 1 : A vecload or vector insn has been issued and a candidate for
18724 pairing has been found and moved to the end of the ready
18726 if (type
== TYPE_VECLOAD
)
18728 /* Issued a vecload. */
18729 if (vec_pairing
== 0)
18731 int vecload_pos
= -1;
18732 /* We issued a single vecload, look for a vector insn to pair it
18733 with. If one isn't found, try to pair another vecload. */
18737 if (recog_memoized (ready
[pos
]) >= 0)
18739 type2
= get_attr_type (ready
[pos
]);
18740 if (is_power9_pairable_vec_type (type2
))
18742 /* Found a vector insn to pair with, move it to the
18743 end of the ready list so it is scheduled next. */
18744 move_to_end_of_ready (ready
, pos
, lastpos
);
18746 return cached_can_issue_more
;
18748 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
18749 /* Remember position of first vecload seen. */
18754 if (vecload_pos
>= 0)
18756 /* Didn't find a vector to pair with but did find a vecload,
18757 move it to the end of the ready list. */
18758 move_to_end_of_ready (ready
, vecload_pos
, lastpos
);
18760 return cached_can_issue_more
;
18764 else if (is_power9_pairable_vec_type (type
))
18766 /* Issued a vector operation. */
18767 if (vec_pairing
== 0)
18770 /* We issued a single vector insn, look for a vecload to pair it
18771 with. If one isn't found, try to pair another vector. */
18775 if (recog_memoized (ready
[pos
]) >= 0)
18777 type2
= get_attr_type (ready
[pos
]);
18778 if (type2
== TYPE_VECLOAD
)
18780 /* Found a vecload insn to pair with, move it to the
18781 end of the ready list so it is scheduled next. */
18782 move_to_end_of_ready (ready
, pos
, lastpos
);
18784 return cached_can_issue_more
;
18786 else if (is_power9_pairable_vec_type (type2
)
18788 /* Remember position of first vector insn seen. */
18795 /* Didn't find a vecload to pair with but did find a vector
18796 insn, move it to the end of the ready list. */
18797 move_to_end_of_ready (ready
, vec_pos
, lastpos
);
18799 return cached_can_issue_more
;
18804 /* We've either finished a vec/vecload pair, couldn't find an insn to
18805 continue the current pair, or the last insn had nothing to do with
18806 with pairing. In any case, reset the state. */
18810 return cached_can_issue_more
;
18813 /* We are about to begin issuing insns for this clock cycle. */
18816 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
18817 rtx_insn
**ready ATTRIBUTE_UNUSED
,
18818 int *pn_ready ATTRIBUTE_UNUSED
,
18819 int clock_var ATTRIBUTE_UNUSED
)
18821 int n_ready
= *pn_ready
;
18824 fprintf (dump
, "// rs6000_sched_reorder :\n");
18826 /* Reorder the ready list, if the second to last ready insn
18827 is a nonepipeline insn. */
18828 if (rs6000_tune
== PROCESSOR_CELL
&& n_ready
> 1)
18830 if (is_nonpipeline_insn (ready
[n_ready
- 1])
18831 && (recog_memoized (ready
[n_ready
- 2]) > 0))
18832 /* Simply swap first two insns. */
18833 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
18836 if (rs6000_tune
== PROCESSOR_POWER6
)
18837 load_store_pendulum
= 0;
18839 return rs6000_issue_rate ();
18842 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18845 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
18846 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
18849 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
18851 /* Do Power6 dependent reordering if necessary. */
18852 if (rs6000_tune
== PROCESSOR_POWER6
&& last_scheduled_insn
)
18853 return power6_sched_reorder2 (ready
, *pn_ready
- 1);
18855 /* Do Power9 dependent reordering if necessary. */
18856 if (rs6000_tune
== PROCESSOR_POWER9
&& last_scheduled_insn
18857 && recog_memoized (last_scheduled_insn
) >= 0)
18858 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
18860 return cached_can_issue_more
;
18863 /* Return whether the presence of INSN causes a dispatch group termination
18864 of group WHICH_GROUP.
18866 If WHICH_GROUP == current_group, this function will return true if INSN
18867 causes the termination of the current group (i.e, the dispatch group to
18868 which INSN belongs). This means that INSN will be the last insn in the
18869 group it belongs to.
18871 If WHICH_GROUP == previous_group, this function will return true if INSN
18872 causes the termination of the previous group (i.e, the dispatch group that
18873 precedes the group to which INSN belongs). This means that INSN will be
18874 the first insn in the group it belongs to). */
18877 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
18884 first
= insn_must_be_first_in_group (insn
);
18885 last
= insn_must_be_last_in_group (insn
);
18890 if (which_group
== current_group
)
18892 else if (which_group
== previous_group
)
18900 insn_must_be_first_in_group (rtx_insn
*insn
)
18902 enum attr_type type
;
18906 || DEBUG_INSN_P (insn
)
18907 || GET_CODE (PATTERN (insn
)) == USE
18908 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18911 switch (rs6000_tune
)
18913 case PROCESSOR_POWER5
:
18914 if (is_cracked_insn (insn
))
18917 case PROCESSOR_POWER4
:
18918 if (is_microcoded_insn (insn
))
18921 if (!rs6000_sched_groups
)
18924 type
= get_attr_type (insn
);
18931 case TYPE_CR_LOGICAL
:
18944 case PROCESSOR_POWER6
:
18945 type
= get_attr_type (insn
);
18954 case TYPE_FPCOMPARE
:
18965 if (get_attr_dot (insn
) == DOT_NO
18966 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
18971 if (get_attr_size (insn
) == SIZE_32
)
18979 if (get_attr_update (insn
) == UPDATE_YES
)
18987 case PROCESSOR_POWER7
:
18988 type
= get_attr_type (insn
);
18992 case TYPE_CR_LOGICAL
:
19006 if (get_attr_dot (insn
) == DOT_YES
)
19011 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19012 || get_attr_update (insn
) == UPDATE_YES
)
19019 if (get_attr_update (insn
) == UPDATE_YES
)
19027 case PROCESSOR_POWER8
:
19028 type
= get_attr_type (insn
);
19032 case TYPE_CR_LOGICAL
:
19040 case TYPE_VECSTORE
:
19047 if (get_attr_dot (insn
) == DOT_YES
)
19052 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19053 || get_attr_update (insn
) == UPDATE_YES
)
19058 if (get_attr_update (insn
) == UPDATE_YES
19059 && get_attr_indexed (insn
) == INDEXED_YES
)
19075 insn_must_be_last_in_group (rtx_insn
*insn
)
19077 enum attr_type type
;
19081 || DEBUG_INSN_P (insn
)
19082 || GET_CODE (PATTERN (insn
)) == USE
19083 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19086 switch (rs6000_tune
) {
19087 case PROCESSOR_POWER4
:
19088 case PROCESSOR_POWER5
:
19089 if (is_microcoded_insn (insn
))
19092 if (is_branch_slot_insn (insn
))
19096 case PROCESSOR_POWER6
:
19097 type
= get_attr_type (insn
);
19105 case TYPE_FPCOMPARE
:
19116 if (get_attr_dot (insn
) == DOT_NO
19117 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19122 if (get_attr_size (insn
) == SIZE_32
)
19130 case PROCESSOR_POWER7
:
19131 type
= get_attr_type (insn
);
19141 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19142 && get_attr_update (insn
) == UPDATE_YES
)
19147 if (get_attr_update (insn
) == UPDATE_YES
19148 && get_attr_indexed (insn
) == INDEXED_YES
)
19156 case PROCESSOR_POWER8
:
19157 type
= get_attr_type (insn
);
19169 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19170 && get_attr_update (insn
) == UPDATE_YES
)
19175 if (get_attr_update (insn
) == UPDATE_YES
19176 && get_attr_indexed (insn
) == INDEXED_YES
)
19191 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19192 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19195 is_costly_group (rtx
*group_insns
, rtx next_insn
)
19198 int issue_rate
= rs6000_issue_rate ();
19200 for (i
= 0; i
< issue_rate
; i
++)
19202 sd_iterator_def sd_it
;
19204 rtx insn
= group_insns
[i
];
19209 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
19211 rtx next
= DEP_CON (dep
);
19213 if (next
== next_insn
19214 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
19222 /* Utility of the function redefine_groups.
19223 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19224 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19225 to keep it "far" (in a separate group) from GROUP_INSNS, following
19226 one of the following schemes, depending on the value of the flag
19227 -minsert_sched_nops = X:
19228 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19229 in order to force NEXT_INSN into a separate group.
19230 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19231 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19232 insertion (has a group just ended, how many vacant issue slots remain in the
19233 last group, and how many dispatch groups were encountered so far). */
19236 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
19237 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
19242 int issue_rate
= rs6000_issue_rate ();
19243 bool end
= *group_end
;
19246 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
19247 return can_issue_more
;
19249 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
19250 return can_issue_more
;
19252 force
= is_costly_group (group_insns
, next_insn
);
19254 return can_issue_more
;
19256 if (sched_verbose
> 6)
19257 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
19258 *group_count
,can_issue_more
);
19260 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
19263 can_issue_more
= 0;
19265 /* Since only a branch can be issued in the last issue_slot, it is
19266 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19267 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19268 in this case the last nop will start a new group and the branch
19269 will be forced to the new group. */
19270 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
19273 /* Do we have a special group ending nop? */
19274 if (rs6000_tune
== PROCESSOR_POWER6
|| rs6000_tune
== PROCESSOR_POWER7
19275 || rs6000_tune
== PROCESSOR_POWER8
)
19277 nop
= gen_group_ending_nop ();
19278 emit_insn_before (nop
, next_insn
);
19279 can_issue_more
= 0;
19282 while (can_issue_more
> 0)
19285 emit_insn_before (nop
, next_insn
);
19293 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
19295 int n_nops
= rs6000_sched_insert_nops
;
19297 /* Nops can't be issued from the branch slot, so the effective
19298 issue_rate for nops is 'issue_rate - 1'. */
19299 if (can_issue_more
== 0)
19300 can_issue_more
= issue_rate
;
19302 if (can_issue_more
== 0)
19304 can_issue_more
= issue_rate
- 1;
19307 for (i
= 0; i
< issue_rate
; i
++)
19309 group_insns
[i
] = 0;
19316 emit_insn_before (nop
, next_insn
);
19317 if (can_issue_more
== issue_rate
- 1) /* new group begins */
19320 if (can_issue_more
== 0)
19322 can_issue_more
= issue_rate
- 1;
19325 for (i
= 0; i
< issue_rate
; i
++)
19327 group_insns
[i
] = 0;
19333 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19336 /* Is next_insn going to start a new group? */
19339 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19340 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19341 || (can_issue_more
< issue_rate
&&
19342 insn_terminates_group_p (next_insn
, previous_group
)));
19343 if (*group_end
&& end
)
19346 if (sched_verbose
> 6)
19347 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
19348 *group_count
, can_issue_more
);
19349 return can_issue_more
;
19352 return can_issue_more
;
19355 /* This function tries to synch the dispatch groups that the compiler "sees"
19356 with the dispatch groups that the processor dispatcher is expected to
19357 form in practice. It tries to achieve this synchronization by forcing the
19358 estimated processor grouping on the compiler (as opposed to the function
19359 'pad_goups' which tries to force the scheduler's grouping on the processor).
19361 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19362 examines the (estimated) dispatch groups that will be formed by the processor
19363 dispatcher. It marks these group boundaries to reflect the estimated
19364 processor grouping, overriding the grouping that the scheduler had marked.
19365 Depending on the value of the flag '-minsert-sched-nops' this function can
19366 force certain insns into separate groups or force a certain distance between
19367 them by inserting nops, for example, if there exists a "costly dependence"
19370 The function estimates the group boundaries that the processor will form as
19371 follows: It keeps track of how many vacant issue slots are available after
19372 each insn. A subsequent insn will start a new group if one of the following
19374 - no more vacant issue slots remain in the current dispatch group.
19375 - only the last issue slot, which is the branch slot, is vacant, but the next
19376 insn is not a branch.
19377 - only the last 2 or less issue slots, including the branch slot, are vacant,
19378 which means that a cracked insn (which occupies two issue slots) can't be
19379 issued in this group.
19380 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19381 start a new group. */
19384 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19387 rtx_insn
*insn
, *next_insn
;
19389 int can_issue_more
;
19392 int group_count
= 0;
19396 issue_rate
= rs6000_issue_rate ();
19397 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
19398 for (i
= 0; i
< issue_rate
; i
++)
19400 group_insns
[i
] = 0;
19402 can_issue_more
= issue_rate
;
19404 insn
= get_next_active_insn (prev_head_insn
, tail
);
19407 while (insn
!= NULL_RTX
)
19409 slot
= (issue_rate
- can_issue_more
);
19410 group_insns
[slot
] = insn
;
19412 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
19413 if (insn_terminates_group_p (insn
, current_group
))
19414 can_issue_more
= 0;
19416 next_insn
= get_next_active_insn (insn
, tail
);
19417 if (next_insn
== NULL_RTX
)
19418 return group_count
+ 1;
19420 /* Is next_insn going to start a new group? */
19422 = (can_issue_more
== 0
19423 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19424 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19425 || (can_issue_more
< issue_rate
&&
19426 insn_terminates_group_p (next_insn
, previous_group
)));
19428 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
19429 next_insn
, &group_end
, can_issue_more
,
19435 can_issue_more
= 0;
19436 for (i
= 0; i
< issue_rate
; i
++)
19438 group_insns
[i
] = 0;
19442 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
19443 PUT_MODE (next_insn
, VOIDmode
);
19444 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
19445 PUT_MODE (next_insn
, TImode
);
19448 if (can_issue_more
== 0)
19449 can_issue_more
= issue_rate
;
19452 return group_count
;
19455 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19456 dispatch group boundaries that the scheduler had marked. Pad with nops
19457 any dispatch groups which have vacant issue slots, in order to force the
19458 scheduler's grouping on the processor dispatcher. The function
19459 returns the number of dispatch groups found. */
19462 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19465 rtx_insn
*insn
, *next_insn
;
19468 int can_issue_more
;
19470 int group_count
= 0;
19472 /* Initialize issue_rate. */
19473 issue_rate
= rs6000_issue_rate ();
19474 can_issue_more
= issue_rate
;
19476 insn
= get_next_active_insn (prev_head_insn
, tail
);
19477 next_insn
= get_next_active_insn (insn
, tail
);
19479 while (insn
!= NULL_RTX
)
19482 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
19484 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
19486 if (next_insn
== NULL_RTX
)
19491 /* If the scheduler had marked group termination at this location
19492 (between insn and next_insn), and neither insn nor next_insn will
19493 force group termination, pad the group with nops to force group
19496 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
19497 && !insn_terminates_group_p (insn
, current_group
)
19498 && !insn_terminates_group_p (next_insn
, previous_group
))
19500 if (!is_branch_slot_insn (next_insn
))
19503 while (can_issue_more
)
19506 emit_insn_before (nop
, next_insn
);
19511 can_issue_more
= issue_rate
;
19516 next_insn
= get_next_active_insn (insn
, tail
);
19519 return group_count
;
19522 /* We're beginning a new block. Initialize data structures as necessary. */
19525 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
19526 int sched_verbose ATTRIBUTE_UNUSED
,
19527 int max_ready ATTRIBUTE_UNUSED
)
19529 last_scheduled_insn
= NULL
;
19530 load_store_pendulum
= 0;
19535 /* The following function is called at the end of scheduling BB.
19536 After reload, it inserts nops at insn group bundling. */
19539 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
19544 fprintf (dump
, "=== Finishing schedule.\n");
19546 if (reload_completed
&& rs6000_sched_groups
)
19548 /* Do not run sched_finish hook when selective scheduling enabled. */
19549 if (sel_sched_p ())
19552 if (rs6000_sched_insert_nops
== sched_finish_none
)
19555 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
19556 n_groups
= pad_groups (dump
, sched_verbose
,
19557 current_sched_info
->prev_head
,
19558 current_sched_info
->next_tail
);
19560 n_groups
= redefine_groups (dump
, sched_verbose
,
19561 current_sched_info
->prev_head
,
19562 current_sched_info
->next_tail
);
19564 if (sched_verbose
>= 6)
19566 fprintf (dump
, "ngroups = %d\n", n_groups
);
19567 print_rtl (dump
, current_sched_info
->prev_head
);
19568 fprintf (dump
, "Done finish_sched\n");
19573 struct rs6000_sched_context
19575 short cached_can_issue_more
;
19576 rtx_insn
*last_scheduled_insn
;
19577 int load_store_pendulum
;
19582 typedef struct rs6000_sched_context rs6000_sched_context_def
;
19583 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
19585 /* Allocate store for new scheduling context. */
19587 rs6000_alloc_sched_context (void)
19589 return xmalloc (sizeof (rs6000_sched_context_def
));
19592 /* If CLEAN_P is true then initializes _SC with clean data,
19593 and from the global context otherwise. */
19595 rs6000_init_sched_context (void *_sc
, bool clean_p
)
19597 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
19601 sc
->cached_can_issue_more
= 0;
19602 sc
->last_scheduled_insn
= NULL
;
19603 sc
->load_store_pendulum
= 0;
19604 sc
->divide_cnt
= 0;
19605 sc
->vec_pairing
= 0;
19609 sc
->cached_can_issue_more
= cached_can_issue_more
;
19610 sc
->last_scheduled_insn
= last_scheduled_insn
;
19611 sc
->load_store_pendulum
= load_store_pendulum
;
19612 sc
->divide_cnt
= divide_cnt
;
19613 sc
->vec_pairing
= vec_pairing
;
19617 /* Sets the global scheduling context to the one pointed to by _SC. */
19619 rs6000_set_sched_context (void *_sc
)
19621 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
19623 gcc_assert (sc
!= NULL
);
19625 cached_can_issue_more
= sc
->cached_can_issue_more
;
19626 last_scheduled_insn
= sc
->last_scheduled_insn
;
19627 load_store_pendulum
= sc
->load_store_pendulum
;
19628 divide_cnt
= sc
->divide_cnt
;
19629 vec_pairing
= sc
->vec_pairing
;
19634 rs6000_free_sched_context (void *_sc
)
19636 gcc_assert (_sc
!= NULL
);
19642 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
19644 switch (get_attr_type (insn
))
19659 /* Length in units of the trampoline for entering a nested function. */
19662 rs6000_trampoline_size (void)
19666 switch (DEFAULT_ABI
)
19669 gcc_unreachable ();
19672 ret
= (TARGET_32BIT
) ? 12 : 24;
19676 gcc_assert (!TARGET_32BIT
);
19682 ret
= (TARGET_32BIT
) ? 40 : 48;
19689 /* Emit RTL insns to initialize the variable parts of a trampoline.
19690 FNADDR is an RTX for the address of the function's pure code.
19691 CXT is an RTX for the static chain value for the function. */
19694 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
19696 int regsize
= (TARGET_32BIT
) ? 4 : 8;
19697 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
19698 rtx ctx_reg
= force_reg (Pmode
, cxt
);
19699 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
19701 switch (DEFAULT_ABI
)
19704 gcc_unreachable ();
19706 /* Under AIX, just build the 3 word function descriptor */
19709 rtx fnmem
, fn_reg
, toc_reg
;
19711 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
19712 error ("you cannot take the address of a nested function if you use "
19713 "the %qs option", "-mno-pointers-to-nested-functions");
19715 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
19716 fn_reg
= gen_reg_rtx (Pmode
);
19717 toc_reg
= gen_reg_rtx (Pmode
);
19719 /* Macro to shorten the code expansions below. */
19720 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19722 m_tramp
= replace_equiv_address (m_tramp
, addr
);
19724 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
19725 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
19726 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
19727 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
19728 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
19734 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19738 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
19739 LCT_NORMAL
, VOIDmode
,
19741 GEN_INT (rs6000_trampoline_size ()), SImode
,
19749 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19750 identifier as an argument, so the front end shouldn't look it up. */
19753 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
19755 return is_attribute_p ("altivec", attr_id
);
19758 /* Handle the "altivec" attribute. The attribute may have
19759 arguments as follows:
19761 __attribute__((altivec(vector__)))
19762 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19763 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19765 and may appear more than once (e.g., 'vector bool char') in a
19766 given declaration. */
19769 rs6000_handle_altivec_attribute (tree
*node
,
19770 tree name ATTRIBUTE_UNUSED
,
19772 int flags ATTRIBUTE_UNUSED
,
19773 bool *no_add_attrs
)
19775 tree type
= *node
, result
= NULL_TREE
;
19779 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
19780 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
19781 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
19784 while (POINTER_TYPE_P (type
)
19785 || TREE_CODE (type
) == FUNCTION_TYPE
19786 || TREE_CODE (type
) == METHOD_TYPE
19787 || TREE_CODE (type
) == ARRAY_TYPE
)
19788 type
= TREE_TYPE (type
);
19790 mode
= TYPE_MODE (type
);
19792 /* Check for invalid AltiVec type qualifiers. */
19793 if (type
== long_double_type_node
)
19794 error ("use of %<long double%> in AltiVec types is invalid");
19795 else if (type
== boolean_type_node
)
19796 error ("use of boolean types in AltiVec types is invalid");
19797 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
19798 error ("use of %<complex%> in AltiVec types is invalid");
19799 else if (DECIMAL_FLOAT_MODE_P (mode
))
19800 error ("use of decimal floating point types in AltiVec types is invalid");
19801 else if (!TARGET_VSX
)
19803 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
19806 error ("use of %<long%> in AltiVec types is invalid for "
19807 "64-bit code without %qs", "-mvsx");
19808 else if (rs6000_warn_altivec_long
)
19809 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19812 else if (type
== long_long_unsigned_type_node
19813 || type
== long_long_integer_type_node
)
19814 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19816 else if (type
== double_type_node
)
19817 error ("use of %<double%> in AltiVec types is invalid without %qs",
19821 switch (altivec_type
)
19824 unsigned_p
= TYPE_UNSIGNED (type
);
19828 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
19831 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
19834 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
19837 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
19840 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
19842 case E_SFmode
: result
= V4SF_type_node
; break;
19843 case E_DFmode
: result
= V2DF_type_node
; break;
19844 /* If the user says 'vector int bool', we may be handed the 'bool'
19845 attribute _before_ the 'vector' attribute, and so select the
19846 proper type in the 'b' case below. */
19847 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
19848 case E_V2DImode
: case E_V2DFmode
:
19856 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
19857 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
19858 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
19859 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
19866 case E_V8HImode
: result
= pixel_V8HI_type_node
;
19872 /* Propagate qualifiers attached to the element type
19873 onto the vector type. */
19874 if (result
&& result
!= type
&& TYPE_QUALS (type
))
19875 result
= build_qualified_type (result
, TYPE_QUALS (type
));
19877 *no_add_attrs
= true; /* No need to hang on to the attribute. */
19880 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
19885 /* AltiVec defines five built-in scalar types that serve as vector
19886 elements; we must teach the compiler how to mangle them. The 128-bit
19887 floating point mangling is target-specific as well. MMA defines
19888 two built-in types to be used as opaque vector types. */
19890 static const char *
19891 rs6000_mangle_type (const_tree type
)
19893 type
= TYPE_MAIN_VARIANT (type
);
19895 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
19896 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
19897 && TREE_CODE (type
) != OPAQUE_TYPE
)
19900 if (type
== bool_char_type_node
) return "U6__boolc";
19901 if (type
== bool_short_type_node
) return "U6__bools";
19902 if (type
== pixel_type_node
) return "u7__pixel";
19903 if (type
== bool_int_type_node
) return "U6__booli";
19904 if (type
== bool_long_long_type_node
) return "U6__boolx";
19906 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IBM_P (TYPE_MODE (type
)))
19908 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IEEE_P (TYPE_MODE (type
)))
19909 return ieee128_mangling_gcc_8_1
? "U10__float128" : "u9__ieee128";
19911 if (type
== vector_pair_type_node
)
19912 return "u13__vector_pair";
19913 if (type
== vector_quad_type_node
)
19914 return "u13__vector_quad";
19916 /* For all other types, use the default mangling. */
19920 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19921 struct attribute_spec.handler. */
19924 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
19925 tree args ATTRIBUTE_UNUSED
,
19926 int flags ATTRIBUTE_UNUSED
,
19927 bool *no_add_attrs
)
19929 if (TREE_CODE (*node
) != FUNCTION_TYPE
19930 && TREE_CODE (*node
) != FIELD_DECL
19931 && TREE_CODE (*node
) != TYPE_DECL
)
19933 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
19935 *no_add_attrs
= true;
19941 /* Set longcall attributes on all functions declared when
19942 rs6000_default_long_calls is true. */
19944 rs6000_set_default_type_attributes (tree type
)
19946 if (rs6000_default_long_calls
19947 && (TREE_CODE (type
) == FUNCTION_TYPE
19948 || TREE_CODE (type
) == METHOD_TYPE
))
19949 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
19951 TYPE_ATTRIBUTES (type
));
19954 darwin_set_default_type_attributes (type
);
19958 /* Return a reference suitable for calling a function with the
19959 longcall attribute. */
19962 rs6000_longcall_ref (rtx call_ref
, rtx arg
)
19964 /* System V adds '.' to the internal name, so skip them. */
19965 const char *call_name
= XSTR (call_ref
, 0);
19966 if (*call_name
== '.')
19968 while (*call_name
== '.')
19971 tree node
= get_identifier (call_name
);
19972 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
19977 rtx base
= const0_rtx
;
19979 if (rs6000_pcrel_p ())
19981 rtx reg
= gen_rtx_REG (Pmode
, regno
);
19982 rtx u
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
19983 gen_rtvec (3, base
, call_ref
, arg
),
19984 UNSPECV_PLT_PCREL
);
19985 emit_insn (gen_rtx_SET (reg
, u
));
19989 if (DEFAULT_ABI
== ABI_ELFv2
)
19990 base
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
19994 base
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
19997 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19998 may be used by a function global entry point. For SysV4, r11
19999 is used by __glink_PLTresolve lazy resolver entry. */
20000 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20001 rtx hi
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, base
, call_ref
, arg
),
20003 rtx lo
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20004 gen_rtvec (3, reg
, call_ref
, arg
),
20006 emit_insn (gen_rtx_SET (reg
, hi
));
20007 emit_insn (gen_rtx_SET (reg
, lo
));
20011 return force_reg (Pmode
, call_ref
);
20014 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20015 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20018 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20019 struct attribute_spec.handler. */
20021 rs6000_handle_struct_attribute (tree
*node
, tree name
,
20022 tree args ATTRIBUTE_UNUSED
,
20023 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
20026 if (DECL_P (*node
))
20028 if (TREE_CODE (*node
) == TYPE_DECL
)
20029 type
= &TREE_TYPE (*node
);
20034 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
20035 || TREE_CODE (*type
) == UNION_TYPE
)))
20037 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
20038 *no_add_attrs
= true;
20041 else if ((is_attribute_p ("ms_struct", name
)
20042 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
20043 || ((is_attribute_p ("gcc_struct", name
)
20044 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
20046 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
20048 *no_add_attrs
= true;
20055 rs6000_ms_bitfield_layout_p (const_tree record_type
)
20057 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
20058 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20059 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
20062 #ifdef USING_ELFOS_H
20064 /* A get_unnamed_section callback, used for switching to toc_section. */
20067 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
20069 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20070 && TARGET_MINIMAL_TOC
)
20072 if (!toc_initialized
)
20074 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20075 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20076 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
20077 fprintf (asm_out_file
, "\t.tc ");
20078 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
20079 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20080 fprintf (asm_out_file
, "\n");
20082 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20083 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20084 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20085 fprintf (asm_out_file
, " = .+32768\n");
20086 toc_initialized
= 1;
20089 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20091 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20093 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20094 if (!toc_initialized
)
20096 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20097 toc_initialized
= 1;
20102 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20103 if (!toc_initialized
)
20105 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20106 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20107 fprintf (asm_out_file
, " = .+32768\n");
20108 toc_initialized
= 1;
20113 /* Implement TARGET_ASM_INIT_SECTIONS. */
20116 rs6000_elf_asm_init_sections (void)
20119 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
20122 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
20123 SDATA2_SECTION_ASM_OP
);
20126 /* Implement TARGET_SELECT_RTX_SECTION. */
20129 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
20130 unsigned HOST_WIDE_INT align
)
20132 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20133 return toc_section
;
20135 return default_elf_select_rtx_section (mode
, x
, align
);
20138 /* For a SYMBOL_REF, set generic flags and then perform some
20139 target-specific processing.
20141 When the AIX ABI is requested on a non-AIX system, replace the
20142 function name with the real name (with a leading .) rather than the
20143 function descriptor name. This saves a lot of overriding code to
20144 read the prefixes. */
20146 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
20148 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
20150 default_encode_section_info (decl
, rtl
, first
);
20153 && TREE_CODE (decl
) == FUNCTION_DECL
20155 && DEFAULT_ABI
== ABI_AIX
)
20157 rtx sym_ref
= XEXP (rtl
, 0);
20158 size_t len
= strlen (XSTR (sym_ref
, 0));
20159 char *str
= XALLOCAVEC (char, len
+ 2);
20161 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
20162 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
20167 compare_section_name (const char *section
, const char *templ
)
20171 len
= strlen (templ
);
20172 return (strncmp (section
, templ
, len
) == 0
20173 && (section
[len
] == 0 || section
[len
] == '.'));
20177 rs6000_elf_in_small_data_p (const_tree decl
)
20179 if (rs6000_sdata
== SDATA_NONE
)
20182 /* We want to merge strings, so we never consider them small data. */
20183 if (TREE_CODE (decl
) == STRING_CST
)
20186 /* Functions are never in the small data area. */
20187 if (TREE_CODE (decl
) == FUNCTION_DECL
)
20190 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_SECTION_NAME (decl
))
20192 const char *section
= DECL_SECTION_NAME (decl
);
20193 if (compare_section_name (section
, ".sdata")
20194 || compare_section_name (section
, ".sdata2")
20195 || compare_section_name (section
, ".gnu.linkonce.s")
20196 || compare_section_name (section
, ".sbss")
20197 || compare_section_name (section
, ".sbss2")
20198 || compare_section_name (section
, ".gnu.linkonce.sb")
20199 || strcmp (section
, ".PPC.EMB.sdata0") == 0
20200 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
20205 /* If we are told not to put readonly data in sdata, then don't. */
20206 if (TREE_READONLY (decl
) && rs6000_sdata
!= SDATA_EABI
20207 && !rs6000_readonly_in_sdata
)
20210 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
20213 && size
<= g_switch_value
20214 /* If it's not public, and we're not going to reference it there,
20215 there's no need to put it in the small data section. */
20216 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
20223 #endif /* USING_ELFOS_H */
20225 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20228 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
20230 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
20233 /* Do not place thread-local symbols refs in the object blocks. */
20236 rs6000_use_blocks_for_decl_p (const_tree decl
)
20238 return !DECL_THREAD_LOCAL_P (decl
);
20241 /* Return a REG that occurs in ADDR with coefficient 1.
20242 ADDR can be effectively incremented by incrementing REG.
20244 r0 is special and we must not select it as an address
20245 register by this routine since our caller will try to
20246 increment the returned register via an "la" instruction. */
20249 find_addr_reg (rtx addr
)
20251 while (GET_CODE (addr
) == PLUS
)
20253 if (REG_P (XEXP (addr
, 0))
20254 && REGNO (XEXP (addr
, 0)) != 0)
20255 addr
= XEXP (addr
, 0);
20256 else if (REG_P (XEXP (addr
, 1))
20257 && REGNO (XEXP (addr
, 1)) != 0)
20258 addr
= XEXP (addr
, 1);
20259 else if (CONSTANT_P (XEXP (addr
, 0)))
20260 addr
= XEXP (addr
, 1);
20261 else if (CONSTANT_P (XEXP (addr
, 1)))
20262 addr
= XEXP (addr
, 0);
20264 gcc_unreachable ();
20266 gcc_assert (REG_P (addr
) && REGNO (addr
) != 0);
20271 rs6000_fatal_bad_address (rtx op
)
20273 fatal_insn ("bad address", op
);
20278 vec
<branch_island
, va_gc
> *branch_islands
;
20280 /* Remember to generate a branch island for far calls to the given
20284 add_compiler_branch_island (tree label_name
, tree function_name
,
20287 branch_island bi
= {function_name
, label_name
, line_number
};
20288 vec_safe_push (branch_islands
, bi
);
20291 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20292 already there or not. */
20295 no_previous_def (tree function_name
)
20300 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20301 if (function_name
== bi
->function_name
)
20306 /* GET_PREV_LABEL gets the label name from the previous definition of
20310 get_prev_label (tree function_name
)
20315 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20316 if (function_name
== bi
->function_name
)
20317 return bi
->label_name
;
20321 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20324 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
20326 unsigned int length
;
20327 char *symbol_name
, *lazy_ptr_name
;
20328 char *local_label_0
;
20329 static unsigned label
= 0;
20331 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20332 symb
= (*targetm
.strip_name_encoding
) (symb
);
20334 length
= strlen (symb
);
20335 symbol_name
= XALLOCAVEC (char, length
+ 32);
20336 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
20338 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
20339 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
20343 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
20344 fprintf (file
, "\t.align 5\n");
20346 fprintf (file
, "%s:\n", stub
);
20347 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20350 local_label_0
= XALLOCAVEC (char, 16);
20351 sprintf (local_label_0
, "L%u$spb", label
);
20353 fprintf (file
, "\tmflr r0\n");
20354 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
20355 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
20356 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
20357 lazy_ptr_name
, local_label_0
);
20358 fprintf (file
, "\tmtlr r0\n");
20359 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
20360 (TARGET_64BIT
? "ldu" : "lwzu"),
20361 lazy_ptr_name
, local_label_0
);
20362 fprintf (file
, "\tmtctr r12\n");
20363 fprintf (file
, "\tbctr\n");
20365 else /* mdynamic-no-pic or mkernel. */
20367 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
20368 fprintf (file
, "\t.align 4\n");
20370 fprintf (file
, "%s:\n", stub
);
20371 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20373 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
20374 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
20375 (TARGET_64BIT
? "ldu" : "lwzu"),
20377 fprintf (file
, "\tmtctr r12\n");
20378 fprintf (file
, "\tbctr\n");
20381 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
20382 fprintf (file
, "%s:\n", lazy_ptr_name
);
20383 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20384 fprintf (file
, "%sdyld_stub_binding_helper\n",
20385 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
20388 /* Legitimize PIC addresses. If the address is already
20389 position-independent, we return ORIG. Newly generated
20390 position-independent addresses go into a reg. This is REG if non
20391 zero, otherwise we allocate register(s) as necessary. */
20393 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20396 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
20401 if (reg
== NULL
&& !reload_completed
)
20402 reg
= gen_reg_rtx (Pmode
);
20404 if (GET_CODE (orig
) == CONST
)
20408 if (GET_CODE (XEXP (orig
, 0)) == PLUS
20409 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
20412 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
20414 /* Use a different reg for the intermediate value, as
20415 it will be marked UNCHANGING. */
20416 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
20417 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
20420 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
20423 if (CONST_INT_P (offset
))
20425 if (SMALL_INT (offset
))
20426 return plus_constant (Pmode
, base
, INTVAL (offset
));
20427 else if (!reload_completed
)
20428 offset
= force_reg (Pmode
, offset
);
20431 rtx mem
= force_const_mem (Pmode
, orig
);
20432 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
20435 return gen_rtx_PLUS (Pmode
, base
, offset
);
20438 /* Fall back on generic machopic code. */
20439 return machopic_legitimize_pic_address (orig
, mode
, reg
);
20442 /* Output a .machine directive for the Darwin assembler, and call
20443 the generic start_file routine. */
20446 rs6000_darwin_file_start (void)
20448 static const struct
20452 HOST_WIDE_INT if_set
;
20454 { "ppc64", "ppc64", MASK_64BIT
},
20455 { "970", "ppc970", MASK_PPC_GPOPT
| MASK_MFCRF
| MASK_POWERPC64
},
20456 { "power4", "ppc970", 0 },
20457 { "G5", "ppc970", 0 },
20458 { "7450", "ppc7450", 0 },
20459 { "7400", "ppc7400", MASK_ALTIVEC
},
20460 { "G4", "ppc7400", 0 },
20461 { "750", "ppc750", 0 },
20462 { "740", "ppc750", 0 },
20463 { "G3", "ppc750", 0 },
20464 { "604e", "ppc604e", 0 },
20465 { "604", "ppc604", 0 },
20466 { "603e", "ppc603", 0 },
20467 { "603", "ppc603", 0 },
20468 { "601", "ppc601", 0 },
20469 { NULL
, "ppc", 0 } };
20470 const char *cpu_id
= "";
20473 rs6000_file_start ();
20474 darwin_file_start ();
20476 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20478 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
20479 cpu_id
= rs6000_default_cpu
;
20481 if (global_options_set
.x_rs6000_cpu_index
)
20482 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
20484 /* Look through the mapping array. Pick the first name that either
20485 matches the argument, has a bit set in IF_SET that is also set
20486 in the target flags, or has a NULL name. */
20489 while (mapping
[i
].arg
!= NULL
20490 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
20491 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
20494 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
20497 #endif /* TARGET_MACHO */
20501 rs6000_elf_reloc_rw_mask (void)
20505 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20511 /* Record an element in the table of global constructors. SYMBOL is
20512 a SYMBOL_REF of the function to be called; PRIORITY is a number
20513 between 0 and MAX_INIT_PRIORITY.
20515 This differs from default_named_section_asm_out_constructor in
20516 that we have special handling for -mrelocatable. */
20518 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
20520 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
20522 const char *section
= ".ctors";
20525 if (priority
!= DEFAULT_INIT_PRIORITY
)
20527 sprintf (buf
, ".ctors.%.5u",
20528 /* Invert the numbering so the linker puts us in the proper
20529 order; constructors are run from right to left, and the
20530 linker sorts in increasing order. */
20531 MAX_INIT_PRIORITY
- priority
);
20535 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
20536 assemble_align (POINTER_SIZE
);
20538 if (DEFAULT_ABI
== ABI_V4
20539 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
20541 fputs ("\t.long (", asm_out_file
);
20542 output_addr_const (asm_out_file
, symbol
);
20543 fputs (")@fixup\n", asm_out_file
);
20546 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
20549 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
20551 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
20553 const char *section
= ".dtors";
20556 if (priority
!= DEFAULT_INIT_PRIORITY
)
20558 sprintf (buf
, ".dtors.%.5u",
20559 /* Invert the numbering so the linker puts us in the proper
20560 order; constructors are run from right to left, and the
20561 linker sorts in increasing order. */
20562 MAX_INIT_PRIORITY
- priority
);
20566 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
20567 assemble_align (POINTER_SIZE
);
20569 if (DEFAULT_ABI
== ABI_V4
20570 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
20572 fputs ("\t.long (", asm_out_file
);
20573 output_addr_const (asm_out_file
, symbol
);
20574 fputs (")@fixup\n", asm_out_file
);
20577 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
20581 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
20583 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
20585 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
20586 ASM_OUTPUT_LABEL (file
, name
);
20587 fputs (DOUBLE_INT_ASM_OP
, file
);
20588 rs6000_output_function_entry (file
, name
);
20589 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
20592 fputs ("\t.size\t", file
);
20593 assemble_name (file
, name
);
20594 fputs (",24\n\t.type\t.", file
);
20595 assemble_name (file
, name
);
20596 fputs (",@function\n", file
);
20597 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
20599 fputs ("\t.globl\t.", file
);
20600 assemble_name (file
, name
);
20605 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
20606 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
20607 rs6000_output_function_entry (file
, name
);
20608 fputs (":\n", file
);
20613 if (DEFAULT_ABI
== ABI_V4
20614 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
20615 && !TARGET_SECURE_PLT
20616 && (!constant_pool_empty_p () || crtl
->profile
)
20617 && (uses_toc
= uses_TOC ()))
20622 switch_to_other_text_partition ();
20623 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
20625 fprintf (file
, "\t.long ");
20626 assemble_name (file
, toc_label_name
);
20629 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
20630 assemble_name (file
, buf
);
20633 switch_to_other_text_partition ();
20636 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
20637 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
20639 if (TARGET_CMODEL
== CMODEL_LARGE
20640 && rs6000_global_entry_point_prologue_needed_p ())
20644 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
20646 fprintf (file
, "\t.quad .TOC.-");
20647 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
20648 assemble_name (file
, buf
);
20652 if (DEFAULT_ABI
== ABI_AIX
)
20654 const char *desc_name
, *orig_name
;
20656 orig_name
= (*targetm
.strip_name_encoding
) (name
);
20657 desc_name
= orig_name
;
20658 while (*desc_name
== '.')
20661 if (TREE_PUBLIC (decl
))
20662 fprintf (file
, "\t.globl %s\n", desc_name
);
20664 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20665 fprintf (file
, "%s:\n", desc_name
);
20666 fprintf (file
, "\t.long %s\n", orig_name
);
20667 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
20668 fputs ("\t.long 0\n", file
);
20669 fprintf (file
, "\t.previous\n");
20671 ASM_OUTPUT_LABEL (file
, name
);
20674 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
20676 rs6000_elf_file_end (void)
20678 #ifdef HAVE_AS_GNU_ATTRIBUTE
20679 /* ??? The value emitted depends on options active at file end.
20680 Assume anyone using #pragma or attributes that might change
20681 options knows what they are doing. */
20682 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
20683 && rs6000_passes_float
)
20687 if (TARGET_HARD_FLOAT
)
20691 if (rs6000_passes_long_double
)
20693 if (!TARGET_LONG_DOUBLE_128
)
20695 else if (TARGET_IEEEQUAD
)
20700 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
20702 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
20704 if (rs6000_passes_vector
)
20705 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
20706 (TARGET_ALTIVEC_ABI
? 2 : 1));
20707 if (rs6000_returns_struct
)
20708 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
20709 aix_struct_return
? 2 : 1);
20712 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20713 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
20714 file_end_indicate_exec_stack ();
20717 if (flag_split_stack
)
20718 file_end_indicate_split_stack ();
20722 /* We have expanded a CPU builtin, so we need to emit a reference to
20723 the special symbol that LIBC uses to declare it supports the
20724 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20725 switch_to_section (data_section
);
20726 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
20727 fprintf (asm_out_file
, "\t%s %s\n",
20728 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
20735 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20736 #define HAVE_XCOFF_DWARF_EXTRAS 0
20739 static enum unwind_info_type
20740 rs6000_xcoff_debug_unwind_info (void)
20746 rs6000_xcoff_asm_output_anchor (rtx symbol
)
20750 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
20751 SYMBOL_REF_BLOCK_OFFSET (symbol
));
20752 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
20753 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
20754 fprintf (asm_out_file
, ",");
20755 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
20756 fprintf (asm_out_file
, "\n");
20760 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
20762 fputs (GLOBAL_ASM_OP
, stream
);
20763 RS6000_OUTPUT_BASENAME (stream
, name
);
20764 putc ('\n', stream
);
20767 /* A get_unnamed_decl callback, used for read-only sections. PTR
20768 points to the section string variable. */
20771 rs6000_xcoff_output_readonly_section_asm_op (const void *directive
)
20773 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
20774 *(const char *const *) directive
,
20775 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
20778 /* Likewise for read-write sections. */
20781 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive
)
20783 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
20784 *(const char *const *) directive
,
20785 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
20789 rs6000_xcoff_output_tls_section_asm_op (const void *directive
)
20791 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
20792 *(const char *const *) directive
,
20793 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
20796 /* A get_unnamed_section callback, used for switching to toc_section. */
20799 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
20801 if (TARGET_MINIMAL_TOC
)
20803 /* toc_section is always selected at least once from
20804 rs6000_xcoff_file_start, so this is guaranteed to
20805 always be defined once and only once in each file. */
20806 if (!toc_initialized
)
20808 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
20809 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
20810 toc_initialized
= 1;
20812 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
20813 (TARGET_32BIT
? "" : ",3"));
20816 fputs ("\t.toc\n", asm_out_file
);
20819 /* Implement TARGET_ASM_INIT_SECTIONS. */
20822 rs6000_xcoff_asm_init_sections (void)
20824 read_only_data_section
20825 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
20826 &xcoff_read_only_section_name
);
20828 private_data_section
20829 = get_unnamed_section (SECTION_WRITE
,
20830 rs6000_xcoff_output_readwrite_section_asm_op
,
20831 &xcoff_private_data_section_name
);
20833 read_only_private_data_section
20834 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
20835 &xcoff_private_rodata_section_name
);
20838 = get_unnamed_section (SECTION_TLS
,
20839 rs6000_xcoff_output_tls_section_asm_op
,
20840 &xcoff_tls_data_section_name
);
20842 tls_private_data_section
20843 = get_unnamed_section (SECTION_TLS
,
20844 rs6000_xcoff_output_tls_section_asm_op
,
20845 &xcoff_private_data_section_name
);
20848 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
20850 readonly_data_section
= read_only_data_section
;
20854 rs6000_xcoff_reloc_rw_mask (void)
20860 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
20861 tree decl ATTRIBUTE_UNUSED
)
20864 static const char * const suffix
[5] = { "PR", "RO", "RW", "TL", "XO" };
20866 if (flags
& SECTION_EXCLUDE
)
20868 else if (flags
& SECTION_DEBUG
)
20870 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
20873 else if (flags
& SECTION_CODE
)
20875 else if (flags
& SECTION_TLS
)
20877 else if (flags
& SECTION_WRITE
)
20882 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
20883 (flags
& SECTION_CODE
) ? "." : "",
20884 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
20887 #define IN_NAMED_SECTION(DECL) \
20888 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20889 && DECL_SECTION_NAME (DECL) != NULL)
20892 rs6000_xcoff_select_section (tree decl
, int reloc
,
20893 unsigned HOST_WIDE_INT align
)
20895 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20897 if (align
> BIGGEST_ALIGNMENT
&& VAR_OR_FUNCTION_DECL_P (decl
))
20899 resolve_unique_section (decl
, reloc
, true);
20900 if (IN_NAMED_SECTION (decl
))
20901 return get_named_section (decl
, NULL
, reloc
);
20904 if (decl_readonly_section (decl
, reloc
))
20906 if (TREE_PUBLIC (decl
))
20907 return read_only_data_section
;
20909 return read_only_private_data_section
;
20914 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
20916 if (TREE_PUBLIC (decl
))
20917 return tls_data_section
;
20918 else if (bss_initializer_p (decl
))
20920 /* Convert to COMMON to emit in BSS. */
20921 DECL_COMMON (decl
) = 1;
20922 return tls_comm_section
;
20925 return tls_private_data_section
;
20929 if (TREE_PUBLIC (decl
))
20930 return data_section
;
20932 return private_data_section
;
20937 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
20941 /* Use select_section for private data and uninitialized data with
20942 alignment <= BIGGEST_ALIGNMENT. */
20943 if (!TREE_PUBLIC (decl
)
20944 || DECL_COMMON (decl
)
20945 || (DECL_INITIAL (decl
) == NULL_TREE
20946 && DECL_ALIGN (decl
) <= BIGGEST_ALIGNMENT
)
20947 || DECL_INITIAL (decl
) == error_mark_node
20948 || (flag_zero_initialized_in_bss
20949 && initializer_zerop (DECL_INITIAL (decl
))))
20952 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
20953 name
= (*targetm
.strip_name_encoding
) (name
);
20954 set_decl_section_name (decl
, name
);
20957 /* Select section for constant in constant pool.
20959 On RS/6000, all constants are in the private read-only data area.
20960 However, if this is being placed in the TOC it must be output as a
20964 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
20965 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
20967 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20968 return toc_section
;
20970 return read_only_private_data_section
;
20973 /* Remove any trailing [DS] or the like from the symbol name. */
20975 static const char *
20976 rs6000_xcoff_strip_name_encoding (const char *name
)
20981 len
= strlen (name
);
20982 if (name
[len
- 1] == ']')
20983 return ggc_alloc_string (name
, len
- 4);
20988 /* Section attributes. AIX is always PIC. */
20990 static unsigned int
20991 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
20993 unsigned int align
;
20994 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
20996 /* Align to at least UNIT size. */
20997 if ((flags
& SECTION_CODE
) != 0 || !decl
|| !DECL_P (decl
))
20998 align
= MIN_UNITS_PER_WORD
;
21000 /* Increase alignment of large objects if not already stricter. */
21001 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
21002 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
21003 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
21005 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
21008 /* Output at beginning of assembler file.
21010 Initialize the section names for the RS/6000 at this point.
21012 Specify filename, including full path, to assembler.
21014 We want to go into the TOC section so at least one .toc will be emitted.
21015 Also, in order to output proper .bs/.es pairs, we need at least one static
21016 [RW] section emitted.
21018 Finally, declare mcount when profiling to make the assembler happy. */
21021 rs6000_xcoff_file_start (void)
21023 rs6000_gen_section_name (&xcoff_bss_section_name
,
21024 main_input_filename
, ".bss_");
21025 rs6000_gen_section_name (&xcoff_private_data_section_name
,
21026 main_input_filename
, ".rw_");
21027 rs6000_gen_section_name (&xcoff_private_rodata_section_name
,
21028 main_input_filename
, ".rop_");
21029 rs6000_gen_section_name (&xcoff_read_only_section_name
,
21030 main_input_filename
, ".ro_");
21031 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
21032 main_input_filename
, ".tls_");
21033 rs6000_gen_section_name (&xcoff_tbss_section_name
,
21034 main_input_filename
, ".tbss_[UL]");
21036 fputs ("\t.file\t", asm_out_file
);
21037 output_quoted_string (asm_out_file
, main_input_filename
);
21038 fputc ('\n', asm_out_file
);
21039 if (write_symbols
!= NO_DEBUG
)
21040 switch_to_section (private_data_section
);
21041 switch_to_section (toc_section
);
21042 switch_to_section (text_section
);
21044 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
21045 rs6000_file_start ();
21048 /* Output at end of assembler file.
21049 On the RS/6000, referencing data should automatically pull in text. */
21052 rs6000_xcoff_file_end (void)
21054 switch_to_section (text_section
);
21055 fputs ("_section_.text:\n", asm_out_file
);
21056 switch_to_section (data_section
);
21057 fputs (TARGET_32BIT
21058 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21062 struct declare_alias_data
21065 bool function_descriptor
;
21068 /* Declare alias N. A helper function for for_node_and_aliases. */
21071 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
21073 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
21074 /* Main symbol is output specially, because varasm machinery does part of
21075 the job for us - we do not need to declare .globl/lglobs and such. */
21076 if (!n
->alias
|| n
->weakref
)
21079 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
21082 /* Prevent assemble_alias from trying to use .set pseudo operation
21083 that does not behave as expected by the middle-end. */
21084 TREE_ASM_WRITTEN (n
->decl
) = true;
21086 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
21087 char *buffer
= (char *) alloca (strlen (name
) + 2);
21089 int dollar_inside
= 0;
21091 strcpy (buffer
, name
);
21092 p
= strchr (buffer
, '$');
21096 p
= strchr (p
+ 1, '$');
21098 if (TREE_PUBLIC (n
->decl
))
21100 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
21102 if (dollar_inside
) {
21103 if (data
->function_descriptor
)
21104 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21105 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21107 if (data
->function_descriptor
)
21109 fputs ("\t.globl .", data
->file
);
21110 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21111 putc ('\n', data
->file
);
21113 fputs ("\t.globl ", data
->file
);
21114 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21115 putc ('\n', data
->file
);
21117 #ifdef ASM_WEAKEN_DECL
21118 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
21119 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
21126 if (data
->function_descriptor
)
21127 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21128 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21130 if (data
->function_descriptor
)
21132 fputs ("\t.lglobl .", data
->file
);
21133 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21134 putc ('\n', data
->file
);
21136 fputs ("\t.lglobl ", data
->file
);
21137 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21138 putc ('\n', data
->file
);
21140 if (data
->function_descriptor
)
21141 fputs (".", data
->file
);
21142 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21143 fputs (":\n", data
->file
);
21148 #ifdef HAVE_GAS_HIDDEN
21149 /* Helper function to calculate visibility of a DECL
21150 and return the value as a const string. */
21152 static const char *
21153 rs6000_xcoff_visibility (tree decl
)
21155 static const char * const visibility_types
[] = {
21156 "", ",protected", ",hidden", ",internal"
21159 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
21160 return visibility_types
[vis
];
21165 /* This macro produces the initial definition of a function name.
21166 On the RS/6000, we need to place an extra '.' in the function name and
21167 output the function descriptor.
21168 Dollar signs are converted to underscores.
21170 The csect for the function will have already been created when
21171 text_section was selected. We do have to go back to that csect, however.
21173 The third and fourth parameters to the .function pseudo-op (16 and 044)
21174 are placeholders which no longer have any use.
21176 Because AIX assembler's .set command has unexpected semantics, we output
21177 all aliases as alternative labels in front of the definition. */
21180 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
21182 char *buffer
= (char *) alloca (strlen (name
) + 1);
21184 int dollar_inside
= 0;
21185 struct declare_alias_data data
= {file
, false};
21187 strcpy (buffer
, name
);
21188 p
= strchr (buffer
, '$');
21192 p
= strchr (p
+ 1, '$');
21194 if (TREE_PUBLIC (decl
))
21196 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
21198 if (dollar_inside
) {
21199 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21200 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21202 fputs ("\t.globl .", file
);
21203 RS6000_OUTPUT_BASENAME (file
, buffer
);
21204 #ifdef HAVE_GAS_HIDDEN
21205 fputs (rs6000_xcoff_visibility (decl
), file
);
21212 if (dollar_inside
) {
21213 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21214 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21216 fputs ("\t.lglobl .", file
);
21217 RS6000_OUTPUT_BASENAME (file
, buffer
);
21220 fputs ("\t.csect ", file
);
21221 RS6000_OUTPUT_BASENAME (file
, buffer
);
21222 fputs (TARGET_32BIT
? "[DS]\n" : "[DS],3\n", file
);
21223 RS6000_OUTPUT_BASENAME (file
, buffer
);
21224 fputs (":\n", file
);
21225 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21227 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
21228 RS6000_OUTPUT_BASENAME (file
, buffer
);
21229 fputs (", TOC[tc0], 0\n", file
);
21231 switch_to_section (function_section (decl
));
21233 RS6000_OUTPUT_BASENAME (file
, buffer
);
21234 fputs (":\n", file
);
21235 data
.function_descriptor
= true;
21236 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21238 if (!DECL_IGNORED_P (decl
))
21240 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
21241 xcoffout_declare_function (file
, decl
, buffer
);
21242 else if (write_symbols
== DWARF2_DEBUG
)
21244 name
= (*targetm
.strip_name_encoding
) (name
);
21245 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
21252 /* Output assembly language to globalize a symbol from a DECL,
21253 possibly with visibility. */
21256 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
21258 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
21259 fputs (GLOBAL_ASM_OP
, stream
);
21260 RS6000_OUTPUT_BASENAME (stream
, name
);
21261 #ifdef HAVE_GAS_HIDDEN
21262 fputs (rs6000_xcoff_visibility (decl
), stream
);
21264 putc ('\n', stream
);
21267 /* Output assembly language to define a symbol as COMMON from a DECL,
21268 possibly with visibility. */
21271 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
21272 tree decl ATTRIBUTE_UNUSED
,
21274 unsigned HOST_WIDE_INT size
,
21275 unsigned HOST_WIDE_INT align
)
21277 unsigned HOST_WIDE_INT align2
= 2;
21280 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
21284 fputs (COMMON_ASM_OP
, stream
);
21285 RS6000_OUTPUT_BASENAME (stream
, name
);
21288 "," HOST_WIDE_INT_PRINT_UNSIGNED
"," HOST_WIDE_INT_PRINT_UNSIGNED
,
21291 #ifdef HAVE_GAS_HIDDEN
21293 fputs (rs6000_xcoff_visibility (decl
), stream
);
21295 putc ('\n', stream
);
21298 /* This macro produces the initial definition of a object (variable) name.
21299 Because AIX assembler's .set command has unexpected semantics, we output
21300 all aliases as alternative labels in front of the definition. */
21303 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
21305 struct declare_alias_data data
= {file
, false};
21306 RS6000_OUTPUT_BASENAME (file
, name
);
21307 fputs (":\n", file
);
21308 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21312 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21315 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
21317 fputs (integer_asm_op (size
, FALSE
), file
);
21318 assemble_name (file
, label
);
21319 fputs ("-$", file
);
21322 /* Output a symbol offset relative to the dbase for the current object.
21323 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21326 __gcc_unwind_dbase is embedded in all executables/libraries through
21327 libgcc/config/rs6000/crtdbase.S. */
21330 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
21332 fputs (integer_asm_op (size
, FALSE
), file
);
21333 assemble_name (file
, label
);
21334 fputs("-__gcc_unwind_dbase", file
);
21339 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
21343 const char *symname
;
21345 default_encode_section_info (decl
, rtl
, first
);
21347 /* Careful not to prod global register variables. */
21350 symbol
= XEXP (rtl
, 0);
21351 if (!SYMBOL_REF_P (symbol
))
21354 flags
= SYMBOL_REF_FLAGS (symbol
);
21356 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21357 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
21359 SYMBOL_REF_FLAGS (symbol
) = flags
;
21361 /* Append mapping class to extern decls. */
21362 symname
= XSTR (symbol
, 0);
21363 if (decl
/* sync condition with assemble_external () */
21364 && DECL_P (decl
) && DECL_EXTERNAL (decl
) && TREE_PUBLIC (decl
)
21365 && ((TREE_CODE (decl
) == VAR_DECL
&& !DECL_THREAD_LOCAL_P (decl
))
21366 || TREE_CODE (decl
) == FUNCTION_DECL
)
21367 && symname
[strlen (symname
) - 1] != ']')
21369 char *newname
= (char *) alloca (strlen (symname
) + 5);
21370 strcpy (newname
, symname
);
21371 strcat (newname
, (TREE_CODE (decl
) == FUNCTION_DECL
21372 ? "[DS]" : "[UA]"));
21373 XSTR (symbol
, 0) = ggc_strdup (newname
);
21376 #endif /* HAVE_AS_TLS */
21377 #endif /* TARGET_XCOFF */
21380 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
21381 const char *name
, const char *val
)
21383 fputs ("\t.weak\t", stream
);
21384 RS6000_OUTPUT_BASENAME (stream
, name
);
21385 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
21386 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
21389 fputs ("[DS]", stream
);
21390 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21392 fputs (rs6000_xcoff_visibility (decl
), stream
);
21394 fputs ("\n\t.weak\t.", stream
);
21395 RS6000_OUTPUT_BASENAME (stream
, name
);
21397 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21399 fputs (rs6000_xcoff_visibility (decl
), stream
);
21401 fputc ('\n', stream
);
21404 #ifdef ASM_OUTPUT_DEF
21405 ASM_OUTPUT_DEF (stream
, name
, val
);
21407 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
21408 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
21410 fputs ("\t.set\t.", stream
);
21411 RS6000_OUTPUT_BASENAME (stream
, name
);
21412 fputs (",.", stream
);
21413 RS6000_OUTPUT_BASENAME (stream
, val
);
21414 fputc ('\n', stream
);
21420 /* Return true if INSN should not be copied. */
21423 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
21425 return recog_memoized (insn
) >= 0
21426 && get_attr_cannot_copy (insn
);
21429 /* Compute a (partial) cost for rtx X. Return true if the complete
21430 cost has been computed, and false if subexpressions should be
21431 scanned. In either case, *TOTAL contains the cost result. */
21434 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
21435 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
21437 int code
= GET_CODE (x
);
21441 /* On the RS/6000, if it is valid in the insn, it is free. */
21443 if (((outer_code
== SET
21444 || outer_code
== PLUS
21445 || outer_code
== MINUS
)
21446 && (satisfies_constraint_I (x
)
21447 || satisfies_constraint_L (x
)))
21448 || (outer_code
== AND
21449 && (satisfies_constraint_K (x
)
21451 ? satisfies_constraint_L (x
)
21452 : satisfies_constraint_J (x
))))
21453 || ((outer_code
== IOR
|| outer_code
== XOR
)
21454 && (satisfies_constraint_K (x
)
21456 ? satisfies_constraint_L (x
)
21457 : satisfies_constraint_J (x
))))
21458 || outer_code
== ASHIFT
21459 || outer_code
== ASHIFTRT
21460 || outer_code
== LSHIFTRT
21461 || outer_code
== ROTATE
21462 || outer_code
== ROTATERT
21463 || outer_code
== ZERO_EXTRACT
21464 || (outer_code
== MULT
21465 && satisfies_constraint_I (x
))
21466 || ((outer_code
== DIV
|| outer_code
== UDIV
21467 || outer_code
== MOD
|| outer_code
== UMOD
)
21468 && exact_log2 (INTVAL (x
)) >= 0)
21469 || (outer_code
== COMPARE
21470 && (satisfies_constraint_I (x
)
21471 || satisfies_constraint_K (x
)))
21472 || ((outer_code
== EQ
|| outer_code
== NE
)
21473 && (satisfies_constraint_I (x
)
21474 || satisfies_constraint_K (x
)
21476 ? satisfies_constraint_L (x
)
21477 : satisfies_constraint_J (x
))))
21478 || (outer_code
== GTU
21479 && satisfies_constraint_I (x
))
21480 || (outer_code
== LTU
21481 && satisfies_constraint_P (x
)))
21486 else if ((outer_code
== PLUS
21487 && reg_or_add_cint_operand (x
, mode
))
21488 || (outer_code
== MINUS
21489 && reg_or_sub_cint_operand (x
, mode
))
21490 || ((outer_code
== SET
21491 || outer_code
== IOR
21492 || outer_code
== XOR
)
21494 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
21496 *total
= COSTS_N_INSNS (1);
21502 case CONST_WIDE_INT
:
21506 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21510 /* When optimizing for size, MEM should be slightly more expensive
21511 than generating address, e.g., (plus (reg) (const)).
21512 L1 cache latency is about two instructions. */
21513 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21514 if (rs6000_slow_unaligned_access (mode
, MEM_ALIGN (x
)))
21515 *total
+= COSTS_N_INSNS (100);
21524 if (FLOAT_MODE_P (mode
))
21525 *total
= rs6000_cost
->fp
;
21527 *total
= COSTS_N_INSNS (1);
21531 if (CONST_INT_P (XEXP (x
, 1))
21532 && satisfies_constraint_I (XEXP (x
, 1)))
21534 if (INTVAL (XEXP (x
, 1)) >= -256
21535 && INTVAL (XEXP (x
, 1)) <= 255)
21536 *total
= rs6000_cost
->mulsi_const9
;
21538 *total
= rs6000_cost
->mulsi_const
;
21540 else if (mode
== SFmode
)
21541 *total
= rs6000_cost
->fp
;
21542 else if (FLOAT_MODE_P (mode
))
21543 *total
= rs6000_cost
->dmul
;
21544 else if (mode
== DImode
)
21545 *total
= rs6000_cost
->muldi
;
21547 *total
= rs6000_cost
->mulsi
;
21551 if (mode
== SFmode
)
21552 *total
= rs6000_cost
->fp
;
21554 *total
= rs6000_cost
->dmul
;
21559 if (FLOAT_MODE_P (mode
))
21561 *total
= mode
== DFmode
? rs6000_cost
->ddiv
21562 : rs6000_cost
->sdiv
;
21569 if (CONST_INT_P (XEXP (x
, 1))
21570 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
21572 if (code
== DIV
|| code
== MOD
)
21574 *total
= COSTS_N_INSNS (2);
21577 *total
= COSTS_N_INSNS (1);
21581 if (GET_MODE (XEXP (x
, 1)) == DImode
)
21582 *total
= rs6000_cost
->divdi
;
21584 *total
= rs6000_cost
->divsi
;
21586 /* Add in shift and subtract for MOD unless we have a mod instruction. */
21587 if (!TARGET_MODULO
&& (code
== MOD
|| code
== UMOD
))
21588 *total
+= COSTS_N_INSNS (2);
21592 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
21596 *total
= COSTS_N_INSNS (4);
21600 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
21604 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
21608 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
21611 *total
= COSTS_N_INSNS (1);
21615 if (CONST_INT_P (XEXP (x
, 1)))
21617 rtx left
= XEXP (x
, 0);
21618 rtx_code left_code
= GET_CODE (left
);
21620 /* rotate-and-mask: 1 insn. */
21621 if ((left_code
== ROTATE
21622 || left_code
== ASHIFT
21623 || left_code
== LSHIFTRT
)
21624 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
21626 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
21627 if (!CONST_INT_P (XEXP (left
, 1)))
21628 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
21629 *total
+= COSTS_N_INSNS (1);
21633 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
21634 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
21635 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
21636 || (val
& 0xffff) == val
21637 || (val
& 0xffff0000) == val
21638 || ((val
& 0xffff) == 0 && mode
== SImode
))
21640 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
21641 *total
+= COSTS_N_INSNS (1);
21646 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
21648 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
21649 *total
+= COSTS_N_INSNS (2);
21654 *total
= COSTS_N_INSNS (1);
21659 *total
= COSTS_N_INSNS (1);
21665 *total
= COSTS_N_INSNS (1);
21669 /* The EXTSWSLI instruction is a combined instruction. Don't count both
21670 the sign extend and shift separately within the insn. */
21671 if (TARGET_EXTSWSLI
&& mode
== DImode
21672 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
21673 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
21684 /* Handle mul_highpart. */
21685 if (outer_code
== TRUNCATE
21686 && GET_CODE (XEXP (x
, 0)) == MULT
)
21688 if (mode
== DImode
)
21689 *total
= rs6000_cost
->muldi
;
21691 *total
= rs6000_cost
->mulsi
;
21694 else if (outer_code
== AND
)
21697 *total
= COSTS_N_INSNS (1);
21702 if (MEM_P (XEXP (x
, 0)))
21705 *total
= COSTS_N_INSNS (1);
21711 if (!FLOAT_MODE_P (mode
))
21713 *total
= COSTS_N_INSNS (1);
21719 case UNSIGNED_FLOAT
:
21722 case FLOAT_TRUNCATE
:
21723 *total
= rs6000_cost
->fp
;
21727 if (mode
== DFmode
)
21728 *total
= rs6000_cost
->sfdf_convert
;
21730 *total
= rs6000_cost
->fp
;
21737 *total
= COSTS_N_INSNS (1);
21740 else if (FLOAT_MODE_P (mode
) && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
)
21742 *total
= rs6000_cost
->fp
;
21751 /* Carry bit requires mode == Pmode.
21752 NEG or PLUS already counted so only add one. */
21754 && (outer_code
== NEG
|| outer_code
== PLUS
))
21756 *total
= COSTS_N_INSNS (1);
21764 if (outer_code
== SET
)
21766 if (XEXP (x
, 1) == const0_rtx
)
21768 *total
= COSTS_N_INSNS (2);
21773 *total
= COSTS_N_INSNS (3);
21778 if (outer_code
== COMPARE
)
21786 if (XINT (x
, 1) == UNSPEC_MMA_XXSETACCZ
)
21800 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21803 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
21804 int opno
, int *total
, bool speed
)
21806 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
21809 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21810 "opno = %d, total = %d, speed = %s, x:\n",
21811 ret
? "complete" : "scan inner",
21812 GET_MODE_NAME (mode
),
21813 GET_RTX_NAME (outer_code
),
21816 speed
? "true" : "false");
21824 rs6000_insn_cost (rtx_insn
*insn
, bool speed
)
21826 if (recog_memoized (insn
) < 0)
21829 /* If we are optimizing for size, just use the length. */
21831 return get_attr_length (insn
);
21833 /* Use the cost if provided. */
21834 int cost
= get_attr_cost (insn
);
21838 /* If the insn tells us how many insns there are, use that. Otherwise use
21839 the length/4. Adjust the insn length to remove the extra size that
21840 prefixed instructions take. */
21841 int n
= get_attr_num_insns (insn
);
21844 int length
= get_attr_length (insn
);
21845 if (get_attr_prefixed (insn
) == PREFIXED_YES
)
21848 ADJUST_INSN_LENGTH (insn
, adjust
);
21855 enum attr_type type
= get_attr_type (insn
);
21862 cost
= COSTS_N_INSNS (n
+ 1);
21866 switch (get_attr_size (insn
))
21869 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const9
;
21872 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const
;
21875 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi
;
21878 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->muldi
;
21881 gcc_unreachable ();
21885 switch (get_attr_size (insn
))
21888 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divsi
;
21891 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divdi
;
21894 gcc_unreachable ();
21899 cost
= n
* rs6000_cost
->fp
;
21902 cost
= n
* rs6000_cost
->dmul
;
21905 cost
= n
* rs6000_cost
->sdiv
;
21908 cost
= n
* rs6000_cost
->ddiv
;
21915 cost
= COSTS_N_INSNS (n
+ 2);
21919 cost
= COSTS_N_INSNS (n
);
21925 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21928 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
21929 addr_space_t as
, bool speed
)
21931 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
21933 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21934 ret
, speed
? "true" : "false");
21941 /* A C expression returning the cost of moving data from a register of class
21942 CLASS1 to one of CLASS2. */
21945 rs6000_register_move_cost (machine_mode mode
,
21946 reg_class_t from
, reg_class_t to
)
21949 reg_class_t rclass
;
21951 if (TARGET_DEBUG_COST
)
21954 /* If we have VSX, we can easily move between FPR or Altivec registers,
21955 otherwise we can only easily move within classes.
21956 Do this first so we give best-case answers for union classes
21957 containing both gprs and vsx regs. */
21958 HARD_REG_SET to_vsx
, from_vsx
;
21959 to_vsx
= reg_class_contents
[to
] & reg_class_contents
[VSX_REGS
];
21960 from_vsx
= reg_class_contents
[from
] & reg_class_contents
[VSX_REGS
];
21961 if (!hard_reg_set_empty_p (to_vsx
)
21962 && !hard_reg_set_empty_p (from_vsx
)
21964 || hard_reg_set_intersect_p (to_vsx
, from_vsx
)))
21966 int reg
= FIRST_FPR_REGNO
;
21968 || (TEST_HARD_REG_BIT (to_vsx
, FIRST_ALTIVEC_REGNO
)
21969 && TEST_HARD_REG_BIT (from_vsx
, FIRST_ALTIVEC_REGNO
)))
21970 reg
= FIRST_ALTIVEC_REGNO
;
21971 ret
= 2 * hard_regno_nregs (reg
, mode
);
21974 /* Moves from/to GENERAL_REGS. */
21975 else if ((rclass
= from
, reg_classes_intersect_p (to
, GENERAL_REGS
))
21976 || (rclass
= to
, reg_classes_intersect_p (from
, GENERAL_REGS
)))
21978 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
21980 if (TARGET_DIRECT_MOVE
)
21982 /* Keep the cost for direct moves above that for within
21983 a register class even if the actual processor cost is
21984 comparable. We do this because a direct move insn
21985 can't be a nop, whereas with ideal register
21986 allocation a move within the same class might turn
21987 out to be a nop. */
21988 if (rs6000_tune
== PROCESSOR_POWER9
21989 || rs6000_tune
== PROCESSOR_POWER10
)
21990 ret
= 3 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
21992 ret
= 4 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
21993 /* SFmode requires a conversion when moving between gprs
21995 if (mode
== SFmode
)
21999 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
22000 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
22003 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22005 else if (rclass
== CR_REGS
)
22008 /* For those processors that have slow LR/CTR moves, make them more
22009 expensive than memory in order to bias spills to memory .*/
22010 else if ((rs6000_tune
== PROCESSOR_POWER6
22011 || rs6000_tune
== PROCESSOR_POWER7
22012 || rs6000_tune
== PROCESSOR_POWER8
22013 || rs6000_tune
== PROCESSOR_POWER9
)
22014 && reg_class_subset_p (rclass
, SPECIAL_REGS
))
22015 ret
= 6 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22018 /* A move will cost one instruction per GPR moved. */
22019 ret
= 2 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22022 /* Everything else has to go through GENERAL_REGS. */
22024 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
22025 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
22027 if (TARGET_DEBUG_COST
)
22029 if (dbg_cost_ctrl
== 1)
22031 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22032 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
22033 reg_class_names
[to
]);
22040 /* A C expressions returning the cost of moving data of MODE from a register to
22044 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
22045 bool in ATTRIBUTE_UNUSED
)
22049 if (TARGET_DEBUG_COST
)
22052 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
22053 ret
= 4 * hard_regno_nregs (0, mode
);
22054 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
22055 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
22056 ret
= 4 * hard_regno_nregs (32, mode
);
22057 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
22058 ret
= 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO
, mode
);
22060 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
22062 if (TARGET_DEBUG_COST
)
22064 if (dbg_cost_ctrl
== 1)
22066 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22067 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
22074 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22076 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22077 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22078 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22079 move cost between GENERAL_REGS and VSX_REGS low.
22081 It might seem reasonable to use a union class. After all, if usage
22082 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22083 rather than memory. However, in cases where register pressure of
22084 both is high, like the cactus_adm spec test, allowing
22085 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22086 the first scheduling pass. This is partly due to an allocno of
22087 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22088 class, which gives too high a pressure for GENERAL_REGS and too low
22089 for VSX_REGS. So, force a choice of the subclass here.
22091 The best class is also the union if GENERAL_REGS and VSX_REGS have
22092 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22093 allocno class, since trying to narrow down the class by regno mode
22094 is prone to error. For example, SImode is allowed in VSX regs and
22095 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22096 it would be wrong to choose an allocno of GENERAL_REGS based on
22100 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED
,
22101 reg_class_t allocno_class
,
22102 reg_class_t best_class
)
22104 switch (allocno_class
)
22106 case GEN_OR_VSX_REGS
:
22107 /* best_class must be a subset of allocno_class. */
22108 gcc_checking_assert (best_class
== GEN_OR_VSX_REGS
22109 || best_class
== GEN_OR_FLOAT_REGS
22110 || best_class
== VSX_REGS
22111 || best_class
== ALTIVEC_REGS
22112 || best_class
== FLOAT_REGS
22113 || best_class
== GENERAL_REGS
22114 || best_class
== BASE_REGS
);
22115 /* Use best_class but choose wider classes when copying from the
22116 wider class to best_class is cheap. This mimics IRA choice
22117 of allocno class. */
22118 if (best_class
== BASE_REGS
)
22119 return GENERAL_REGS
;
22121 && (best_class
== FLOAT_REGS
|| best_class
== ALTIVEC_REGS
))
22129 return allocno_class
;
22132 /* Returns a code for a target-specific builtin that implements
22133 reciprocal of the function, or NULL_TREE if not available. */
22136 rs6000_builtin_reciprocal (tree fndecl
)
22138 switch (DECL_MD_FUNCTION_CODE (fndecl
))
22140 case VSX_BUILTIN_XVSQRTDP
:
22141 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode
))
22144 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
22146 case VSX_BUILTIN_XVSQRTSP
:
22147 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode
))
22150 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_4SF
];
22157 /* Load up a constant. If the mode is a vector mode, splat the value across
22158 all of the vector elements. */
22161 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
22165 if (mode
== SFmode
|| mode
== DFmode
)
22167 rtx d
= const_double_from_real_value (dconst
, mode
);
22168 reg
= force_reg (mode
, d
);
22170 else if (mode
== V4SFmode
)
22172 rtx d
= const_double_from_real_value (dconst
, SFmode
);
22173 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
22174 reg
= gen_reg_rtx (mode
);
22175 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22177 else if (mode
== V2DFmode
)
22179 rtx d
= const_double_from_real_value (dconst
, DFmode
);
22180 rtvec v
= gen_rtvec (2, d
, d
);
22181 reg
= gen_reg_rtx (mode
);
22182 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22185 gcc_unreachable ();
22190 /* Generate an FMA instruction. */
22193 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
22195 machine_mode mode
= GET_MODE (target
);
22198 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
22199 gcc_assert (dst
!= NULL
);
22202 emit_move_insn (target
, dst
);
22205 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22208 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
22210 machine_mode mode
= GET_MODE (dst
);
22213 /* This is a tad more complicated, since the fnma_optab is for
22214 a different expression: fma(-m1, m2, a), which is the same
22215 thing except in the case of signed zeros.
22217 Fortunately we know that if FMA is supported that FNMSUB is
22218 also supported in the ISA. Just expand it directly. */
22220 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
22222 r
= gen_rtx_NEG (mode
, a
);
22223 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
22224 r
= gen_rtx_NEG (mode
, r
);
22225 emit_insn (gen_rtx_SET (dst
, r
));
22228 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22229 add a reg_note saying that this was a division. Support both scalar and
22230 vector divide. Assumes no trapping math and finite arguments. */
22233 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
22235 machine_mode mode
= GET_MODE (dst
);
22236 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
22239 /* Low precision estimates guarantee 5 bits of accuracy. High
22240 precision estimates guarantee 14 bits of accuracy. SFmode
22241 requires 23 bits of accuracy. DFmode requires 52 bits of
22242 accuracy. Each pass at least doubles the accuracy, leading
22243 to the following. */
22244 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22245 if (mode
== DFmode
|| mode
== V2DFmode
)
22248 enum insn_code code
= optab_handler (smul_optab
, mode
);
22249 insn_gen_fn gen_mul
= GEN_FCN (code
);
22251 gcc_assert (code
!= CODE_FOR_nothing
);
22253 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
22255 /* x0 = 1./d estimate */
22256 x0
= gen_reg_rtx (mode
);
22257 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
22260 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22263 /* e0 = 1. - d * x0 */
22264 e0
= gen_reg_rtx (mode
);
22265 rs6000_emit_nmsub (e0
, d
, x0
, one
);
22267 /* x1 = x0 + e0 * x0 */
22268 x1
= gen_reg_rtx (mode
);
22269 rs6000_emit_madd (x1
, e0
, x0
, x0
);
22271 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
22272 ++i
, xprev
= xnext
, eprev
= enext
) {
22274 /* enext = eprev * eprev */
22275 enext
= gen_reg_rtx (mode
);
22276 emit_insn (gen_mul (enext
, eprev
, eprev
));
22278 /* xnext = xprev + enext * xprev */
22279 xnext
= gen_reg_rtx (mode
);
22280 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
22286 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22288 /* u = n * xprev */
22289 u
= gen_reg_rtx (mode
);
22290 emit_insn (gen_mul (u
, n
, xprev
));
22292 /* v = n - (d * u) */
22293 v
= gen_reg_rtx (mode
);
22294 rs6000_emit_nmsub (v
, d
, u
, n
);
22296 /* dst = (v * xprev) + u */
22297 rs6000_emit_madd (dst
, v
, xprev
, u
);
22300 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
22303 /* Goldschmidt's Algorithm for single/double-precision floating point
22304 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22307 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
22309 machine_mode mode
= GET_MODE (src
);
22310 rtx e
= gen_reg_rtx (mode
);
22311 rtx g
= gen_reg_rtx (mode
);
22312 rtx h
= gen_reg_rtx (mode
);
22314 /* Low precision estimates guarantee 5 bits of accuracy. High
22315 precision estimates guarantee 14 bits of accuracy. SFmode
22316 requires 23 bits of accuracy. DFmode requires 52 bits of
22317 accuracy. Each pass at least doubles the accuracy, leading
22318 to the following. */
22319 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22320 if (mode
== DFmode
|| mode
== V2DFmode
)
22325 enum insn_code code
= optab_handler (smul_optab
, mode
);
22326 insn_gen_fn gen_mul
= GEN_FCN (code
);
22328 gcc_assert (code
!= CODE_FOR_nothing
);
22330 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
22332 /* e = rsqrt estimate */
22333 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
22336 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22339 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
22341 if (mode
== SFmode
)
22343 rtx target
= emit_conditional_move (e
, GT
, src
, zero
, mode
,
22346 emit_move_insn (e
, target
);
22350 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
22351 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
22355 /* g = sqrt estimate. */
22356 emit_insn (gen_mul (g
, e
, src
));
22357 /* h = 1/(2*sqrt) estimate. */
22358 emit_insn (gen_mul (h
, e
, mhalf
));
22364 rtx t
= gen_reg_rtx (mode
);
22365 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
22366 /* Apply correction directly to 1/rsqrt estimate. */
22367 rs6000_emit_madd (dst
, e
, t
, e
);
22371 for (i
= 0; i
< passes
; i
++)
22373 rtx t1
= gen_reg_rtx (mode
);
22374 rtx g1
= gen_reg_rtx (mode
);
22375 rtx h1
= gen_reg_rtx (mode
);
22377 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
22378 rs6000_emit_madd (g1
, g
, t1
, g
);
22379 rs6000_emit_madd (h1
, h
, t1
, h
);
22384 /* Multiply by 2 for 1/rsqrt. */
22385 emit_insn (gen_add3_insn (dst
, h
, h
));
22390 rtx t
= gen_reg_rtx (mode
);
22391 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
22392 rs6000_emit_madd (dst
, g
, t
, g
);
22398 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22399 (Power7) targets. DST is the target, and SRC is the argument operand. */
22402 rs6000_emit_popcount (rtx dst
, rtx src
)
22404 machine_mode mode
= GET_MODE (dst
);
22407 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22408 if (TARGET_POPCNTD
)
22410 if (mode
== SImode
)
22411 emit_insn (gen_popcntdsi2 (dst
, src
));
22413 emit_insn (gen_popcntddi2 (dst
, src
));
22417 tmp1
= gen_reg_rtx (mode
);
22419 if (mode
== SImode
)
22421 emit_insn (gen_popcntbsi2 (tmp1
, src
));
22422 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
22424 tmp2
= force_reg (SImode
, tmp2
);
22425 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
22429 emit_insn (gen_popcntbdi2 (tmp1
, src
));
22430 tmp2
= expand_mult (DImode
, tmp1
,
22431 GEN_INT ((HOST_WIDE_INT
)
22432 0x01010101 << 32 | 0x01010101),
22434 tmp2
= force_reg (DImode
, tmp2
);
22435 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
22440 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22441 target, and SRC is the argument operand. */
22444 rs6000_emit_parity (rtx dst
, rtx src
)
22446 machine_mode mode
= GET_MODE (dst
);
22449 tmp
= gen_reg_rtx (mode
);
22451 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22454 if (mode
== SImode
)
22456 emit_insn (gen_popcntbsi2 (tmp
, src
));
22457 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
22461 emit_insn (gen_popcntbdi2 (tmp
, src
));
22462 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
22467 if (mode
== SImode
)
22469 /* Is mult+shift >= shift+xor+shift+xor? */
22470 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
22472 rtx tmp1
, tmp2
, tmp3
, tmp4
;
22474 tmp1
= gen_reg_rtx (SImode
);
22475 emit_insn (gen_popcntbsi2 (tmp1
, src
));
22477 tmp2
= gen_reg_rtx (SImode
);
22478 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
22479 tmp3
= gen_reg_rtx (SImode
);
22480 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
22482 tmp4
= gen_reg_rtx (SImode
);
22483 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
22484 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
22487 rs6000_emit_popcount (tmp
, src
);
22488 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
22492 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22493 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
22495 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
22497 tmp1
= gen_reg_rtx (DImode
);
22498 emit_insn (gen_popcntbdi2 (tmp1
, src
));
22500 tmp2
= gen_reg_rtx (DImode
);
22501 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
22502 tmp3
= gen_reg_rtx (DImode
);
22503 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
22505 tmp4
= gen_reg_rtx (DImode
);
22506 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
22507 tmp5
= gen_reg_rtx (DImode
);
22508 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
22510 tmp6
= gen_reg_rtx (DImode
);
22511 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
22512 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
22515 rs6000_emit_popcount (tmp
, src
);
22516 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
22520 /* Expand an Altivec constant permutation for little endian mode.
22521 OP0 and OP1 are the input vectors and TARGET is the output vector.
22522 SEL specifies the constant permutation vector.
22524 There are two issues: First, the two input operands must be
22525 swapped so that together they form a double-wide array in LE
22526 order. Second, the vperm instruction has surprising behavior
22527 in LE mode: it interprets the elements of the source vectors
22528 in BE mode ("left to right") and interprets the elements of
22529 the destination vector in LE mode ("right to left"). To
22530 correct for this, we must subtract each element of the permute
22531 control vector from 31.
22533 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22534 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22535 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22536 serve as the permute control vector. Then, in BE mode,
22540 places the desired result in vr9. However, in LE mode the
22541 vector contents will be
22543 vr10 = 00000003 00000002 00000001 00000000
22544 vr11 = 00000007 00000006 00000005 00000004
22546 The result of the vperm using the same permute control vector is
22548 vr9 = 05000000 07000000 01000000 03000000
22550 That is, the leftmost 4 bytes of vr10 are interpreted as the
22551 source for the rightmost 4 bytes of vr9, and so on.
22553 If we change the permute control vector to
22555 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22563 vr9 = 00000006 00000004 00000002 00000000. */
22566 altivec_expand_vec_perm_const_le (rtx target
, rtx op0
, rtx op1
,
22567 const vec_perm_indices
&sel
)
22571 rtx constv
, unspec
;
22573 /* Unpack and adjust the constant selector. */
22574 for (i
= 0; i
< 16; ++i
)
22576 unsigned int elt
= 31 - (sel
[i
] & 31);
22577 perm
[i
] = GEN_INT (elt
);
22580 /* Expand to a permute, swapping the inputs and using the
22581 adjusted selector. */
22583 op0
= force_reg (V16QImode
, op0
);
22585 op1
= force_reg (V16QImode
, op1
);
22587 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
22588 constv
= force_reg (V16QImode
, constv
);
22589 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
22591 if (!REG_P (target
))
22593 rtx tmp
= gen_reg_rtx (V16QImode
);
22594 emit_move_insn (tmp
, unspec
);
22598 emit_move_insn (target
, unspec
);
22601 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22602 permute control vector. But here it's not a constant, so we must
22603 generate a vector NAND or NOR to do the adjustment. */
22606 altivec_expand_vec_perm_le (rtx operands
[4])
22608 rtx notx
, iorx
, unspec
;
22609 rtx target
= operands
[0];
22610 rtx op0
= operands
[1];
22611 rtx op1
= operands
[2];
22612 rtx sel
= operands
[3];
22614 rtx norreg
= gen_reg_rtx (V16QImode
);
22615 machine_mode mode
= GET_MODE (target
);
22617 /* Get everything in regs so the pattern matches. */
22619 op0
= force_reg (mode
, op0
);
22621 op1
= force_reg (mode
, op1
);
22623 sel
= force_reg (V16QImode
, sel
);
22624 if (!REG_P (target
))
22625 tmp
= gen_reg_rtx (mode
);
22627 if (TARGET_P9_VECTOR
)
22629 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, sel
),
22634 /* Invert the selector with a VNAND if available, else a VNOR.
22635 The VNAND is preferred for future fusion opportunities. */
22636 notx
= gen_rtx_NOT (V16QImode
, sel
);
22637 iorx
= (TARGET_P8_VECTOR
22638 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
22639 : gen_rtx_AND (V16QImode
, notx
, notx
));
22640 emit_insn (gen_rtx_SET (norreg
, iorx
));
22642 /* Permute with operands reversed and adjusted selector. */
22643 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
22647 /* Copy into target, possibly by way of a register. */
22648 if (!REG_P (target
))
22650 emit_move_insn (tmp
, unspec
);
22654 emit_move_insn (target
, unspec
);
22657 /* Expand an Altivec constant permutation. Return true if we match
22658 an efficient implementation; false to fall back to VPERM.
22660 OP0 and OP1 are the input vectors and TARGET is the output vector.
22661 SEL specifies the constant permutation vector. */
22664 altivec_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
,
22665 const vec_perm_indices
&sel
)
22667 struct altivec_perm_insn
{
22668 HOST_WIDE_INT mask
;
22669 enum insn_code impl
;
22670 unsigned char perm
[16];
22672 static const struct altivec_perm_insn patterns
[] = {
22673 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuhum_direct
,
22674 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
22675 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuwum_direct
,
22676 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
22677 { OPTION_MASK_ALTIVEC
,
22678 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
22679 : CODE_FOR_altivec_vmrglb_direct
),
22680 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
22681 { OPTION_MASK_ALTIVEC
,
22682 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
22683 : CODE_FOR_altivec_vmrglh_direct
),
22684 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
22685 { OPTION_MASK_ALTIVEC
,
22686 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct
22687 : CODE_FOR_altivec_vmrglw_direct
),
22688 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
22689 { OPTION_MASK_ALTIVEC
,
22690 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
22691 : CODE_FOR_altivec_vmrghb_direct
),
22692 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
22693 { OPTION_MASK_ALTIVEC
,
22694 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
22695 : CODE_FOR_altivec_vmrghh_direct
),
22696 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
22697 { OPTION_MASK_ALTIVEC
,
22698 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct
22699 : CODE_FOR_altivec_vmrghw_direct
),
22700 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
22701 { OPTION_MASK_P8_VECTOR
,
22702 (BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgew_v4sf_direct
22703 : CODE_FOR_p8_vmrgow_v4sf_direct
),
22704 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
22705 { OPTION_MASK_P8_VECTOR
,
22706 (BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgow_v4sf_direct
22707 : CODE_FOR_p8_vmrgew_v4sf_direct
),
22708 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
22711 unsigned int i
, j
, elt
, which
;
22712 unsigned char perm
[16];
22716 /* Unpack the constant selector. */
22717 for (i
= which
= 0; i
< 16; ++i
)
22720 which
|= (elt
< 16 ? 1 : 2);
22724 /* Simplify the constant selector based on operands. */
22728 gcc_unreachable ();
22732 if (!rtx_equal_p (op0
, op1
))
22737 for (i
= 0; i
< 16; ++i
)
22749 /* Look for splat patterns. */
22754 for (i
= 0; i
< 16; ++i
)
22755 if (perm
[i
] != elt
)
22759 if (!BYTES_BIG_ENDIAN
)
22761 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
22767 for (i
= 0; i
< 16; i
+= 2)
22768 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
22772 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
22773 x
= gen_reg_rtx (V8HImode
);
22774 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
22776 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
22783 for (i
= 0; i
< 16; i
+= 4)
22785 || perm
[i
+ 1] != elt
+ 1
22786 || perm
[i
+ 2] != elt
+ 2
22787 || perm
[i
+ 3] != elt
+ 3)
22791 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
22792 x
= gen_reg_rtx (V4SImode
);
22793 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
22795 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
22801 /* Look for merge and pack patterns. */
22802 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
22806 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
22809 elt
= patterns
[j
].perm
[0];
22810 if (perm
[0] == elt
)
22812 else if (perm
[0] == elt
+ 16)
22816 for (i
= 1; i
< 16; ++i
)
22818 elt
= patterns
[j
].perm
[i
];
22820 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
22821 else if (one_vec
&& elt
>= 16)
22823 if (perm
[i
] != elt
)
22828 enum insn_code icode
= patterns
[j
].impl
;
22829 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
22830 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
22832 /* For little-endian, don't use vpkuwum and vpkuhum if the
22833 underlying vector type is not V4SI and V8HI, respectively.
22834 For example, using vpkuwum with a V8HI picks up the even
22835 halfwords (BE numbering) when the even halfwords (LE
22836 numbering) are what we need. */
22837 if (!BYTES_BIG_ENDIAN
22838 && icode
== CODE_FOR_altivec_vpkuwum_direct
22840 && GET_MODE (op0
) != V4SImode
)
22842 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
22844 if (!BYTES_BIG_ENDIAN
22845 && icode
== CODE_FOR_altivec_vpkuhum_direct
22847 && GET_MODE (op0
) != V8HImode
)
22849 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
22852 /* For little-endian, the two input operands must be swapped
22853 (or swapped back) to ensure proper right-to-left numbering
22855 if (swapped
^ !BYTES_BIG_ENDIAN
)
22856 std::swap (op0
, op1
);
22857 if (imode
!= V16QImode
)
22859 op0
= gen_lowpart (imode
, op0
);
22860 op1
= gen_lowpart (imode
, op1
);
22862 if (omode
== V16QImode
)
22865 x
= gen_reg_rtx (omode
);
22866 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
22867 if (omode
!= V16QImode
)
22868 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
22873 if (!BYTES_BIG_ENDIAN
)
22875 altivec_expand_vec_perm_const_le (target
, op0
, op1
, sel
);
22882 /* Expand a VSX Permute Doubleword constant permutation.
22883 Return true if we match an efficient implementation. */
22886 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
22887 unsigned char perm0
, unsigned char perm1
)
22891 /* If both selectors come from the same operand, fold to single op. */
22892 if ((perm0
& 2) == (perm1
& 2))
22899 /* If both operands are equal, fold to simpler permutation. */
22900 if (rtx_equal_p (op0
, op1
))
22903 perm1
= (perm1
& 1) + 2;
22905 /* If the first selector comes from the second operand, swap. */
22906 else if (perm0
& 2)
22912 std::swap (op0
, op1
);
22914 /* If the second selector does not come from the second operand, fail. */
22915 else if ((perm1
& 2) == 0)
22919 if (target
!= NULL
)
22921 machine_mode vmode
, dmode
;
22924 vmode
= GET_MODE (target
);
22925 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
22926 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4).require ();
22927 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
22928 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
22929 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
22930 emit_insn (gen_rtx_SET (target
, x
));
22935 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22938 rs6000_vectorize_vec_perm_const (machine_mode vmode
, rtx target
, rtx op0
,
22939 rtx op1
, const vec_perm_indices
&sel
)
22941 bool testing_p
= !target
;
22943 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22944 if (TARGET_ALTIVEC
&& testing_p
)
22949 rtx nop0
= force_reg (vmode
, op0
);
22955 op1
= force_reg (vmode
, op1
);
22957 /* Check for ps_merge* or xxpermdi insns. */
22958 if ((vmode
== V2DFmode
|| vmode
== V2DImode
) && VECTOR_MEM_VSX_P (vmode
))
22962 op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
22963 op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
22965 if (rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, sel
[0], sel
[1]))
22969 if (TARGET_ALTIVEC
)
22971 /* Force the target-independent code to lower to V16QImode. */
22972 if (vmode
!= V16QImode
)
22974 if (altivec_expand_vec_perm_const (target
, op0
, op1
, sel
))
22981 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22982 OP0 and OP1 are the input vectors and TARGET is the output vector.
22983 PERM specifies the constant permutation vector. */
22986 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
22987 machine_mode vmode
, const vec_perm_builder
&perm
)
22989 rtx x
= expand_vec_perm_const (vmode
, op0
, op1
, perm
, BLKmode
, target
);
22991 emit_move_insn (target
, x
);
22994 /* Expand an extract even operation. */
22997 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
22999 machine_mode vmode
= GET_MODE (target
);
23000 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
23001 vec_perm_builder
perm (nelt
, nelt
, 1);
23003 for (i
= 0; i
< nelt
; i
++)
23004 perm
.quick_push (i
* 2);
23006 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23009 /* Expand a vector interleave operation. */
23012 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
23014 machine_mode vmode
= GET_MODE (target
);
23015 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
23016 vec_perm_builder
perm (nelt
, nelt
, 1);
23018 high
= (highp
? 0 : nelt
/ 2);
23019 for (i
= 0; i
< nelt
/ 2; i
++)
23021 perm
.quick_push (i
+ high
);
23022 perm
.quick_push (i
+ nelt
+ high
);
23025 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23028 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23030 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
23032 HOST_WIDE_INT
hwi_scale (scale
);
23033 REAL_VALUE_TYPE r_pow
;
23034 rtvec v
= rtvec_alloc (2);
23036 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
23037 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
23038 elt
= const_double_from_real_value (r_pow
, DFmode
);
23039 RTVEC_ELT (v
, 0) = elt
;
23040 RTVEC_ELT (v
, 1) = elt
;
23041 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
23042 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
23045 /* Return an RTX representing where to find the function value of a
23046 function returning MODE. */
23048 rs6000_complex_function_value (machine_mode mode
)
23050 unsigned int regno
;
23052 machine_mode inner
= GET_MODE_INNER (mode
);
23053 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
23055 if (TARGET_FLOAT128_TYPE
23057 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
23058 regno
= ALTIVEC_ARG_RETURN
;
23060 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23061 regno
= FP_ARG_RETURN
;
23065 regno
= GP_ARG_RETURN
;
23067 /* 32-bit is OK since it'll go in r3/r4. */
23068 if (TARGET_32BIT
&& inner_bytes
>= 4)
23069 return gen_rtx_REG (mode
, regno
);
23072 if (inner_bytes
>= 8)
23073 return gen_rtx_REG (mode
, regno
);
23075 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
23077 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
23078 GEN_INT (inner_bytes
));
23079 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
23082 /* Return an rtx describing a return value of MODE as a PARALLEL
23083 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23084 stride REG_STRIDE. */
23087 rs6000_parallel_return (machine_mode mode
,
23088 int n_elts
, machine_mode elt_mode
,
23089 unsigned int regno
, unsigned int reg_stride
)
23091 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
23094 for (i
= 0; i
< n_elts
; i
++)
23096 rtx r
= gen_rtx_REG (elt_mode
, regno
);
23097 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
23098 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
23099 regno
+= reg_stride
;
23105 /* Target hook for TARGET_FUNCTION_VALUE.
23107 An integer value is in r3 and a floating-point value is in fp1,
23108 unless -msoft-float. */
23111 rs6000_function_value (const_tree valtype
,
23112 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
23113 bool outgoing ATTRIBUTE_UNUSED
)
23116 unsigned int regno
;
23117 machine_mode elt_mode
;
23120 /* Special handling for structs in darwin64. */
23122 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
23124 CUMULATIVE_ARGS valcum
;
23128 valcum
.fregno
= FP_ARG_MIN_REG
;
23129 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
23130 /* Do a trial code generation as if this were going to be passed as
23131 an argument; if any part goes in memory, we return NULL. */
23132 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
23135 /* Otherwise fall through to standard ABI rules. */
23138 mode
= TYPE_MODE (valtype
);
23140 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23141 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
23143 int first_reg
, n_regs
;
23145 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
23147 /* _Decimal128 must use even/odd register pairs. */
23148 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23149 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
23153 first_reg
= ALTIVEC_ARG_RETURN
;
23157 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
23160 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23161 if (TARGET_32BIT
&& TARGET_POWERPC64
)
23170 int count
= GET_MODE_SIZE (mode
) / 4;
23171 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
23174 if ((INTEGRAL_TYPE_P (valtype
)
23175 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
23176 || POINTER_TYPE_P (valtype
))
23177 mode
= TARGET_32BIT
? SImode
: DImode
;
23179 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23180 /* _Decimal128 must use an even/odd register pair. */
23181 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23182 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
23183 && !FLOAT128_VECTOR_P (mode
))
23184 regno
= FP_ARG_RETURN
;
23185 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
23186 && targetm
.calls
.split_complex_arg
)
23187 return rs6000_complex_function_value (mode
);
23188 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23189 return register is used in both cases, and we won't see V2DImode/V2DFmode
23190 for pure altivec, combine the two cases. */
23191 else if ((TREE_CODE (valtype
) == VECTOR_TYPE
|| VECTOR_ALIGNMENT_P (mode
))
23192 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
23193 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
23194 regno
= ALTIVEC_ARG_RETURN
;
23196 regno
= GP_ARG_RETURN
;
23198 return gen_rtx_REG (mode
, regno
);
23201 /* Define how to find the value returned by a library function
23202 assuming the value has mode MODE. */
23204 rs6000_libcall_value (machine_mode mode
)
23206 unsigned int regno
;
23208 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23209 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
23210 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
23212 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23213 /* _Decimal128 must use an even/odd register pair. */
23214 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23215 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && TARGET_HARD_FLOAT
)
23216 regno
= FP_ARG_RETURN
;
23217 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23218 return register is used in both cases, and we won't see V2DImode/V2DFmode
23219 for pure altivec, combine the two cases. */
23220 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
23221 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
23222 regno
= ALTIVEC_ARG_RETURN
;
23223 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
23224 return rs6000_complex_function_value (mode
);
23226 regno
= GP_ARG_RETURN
;
23228 return gen_rtx_REG (mode
, regno
);
23231 /* Compute register pressure classes. We implement the target hook to avoid
23232 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23233 lead to incorrect estimates of number of available registers and therefor
23234 increased register pressure/spill. */
23236 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
23241 pressure_classes
[n
++] = GENERAL_REGS
;
23243 pressure_classes
[n
++] = VSX_REGS
;
23246 if (TARGET_ALTIVEC
)
23247 pressure_classes
[n
++] = ALTIVEC_REGS
;
23248 if (TARGET_HARD_FLOAT
)
23249 pressure_classes
[n
++] = FLOAT_REGS
;
23251 pressure_classes
[n
++] = CR_REGS
;
23252 pressure_classes
[n
++] = SPECIAL_REGS
;
23257 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23258 Frame pointer elimination is automatically handled.
23260 For the RS/6000, if frame pointer elimination is being done, we would like
23261 to convert ap into fp, not sp.
23263 We need r30 if -mminimal-toc was specified, and there are constant pool
23267 rs6000_can_eliminate (const int from
, const int to
)
23269 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
23270 ? ! frame_pointer_needed
23271 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
23272 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC_OR_PCREL
23273 || constant_pool_empty_p ()
23277 /* Define the offset between two registers, FROM to be eliminated and its
23278 replacement TO, at the start of a routine. */
23280 rs6000_initial_elimination_offset (int from
, int to
)
23282 rs6000_stack_t
*info
= rs6000_stack_info ();
23283 HOST_WIDE_INT offset
;
23285 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23286 offset
= info
->push_p
? 0 : -info
->total_size
;
23287 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23289 offset
= info
->push_p
? 0 : -info
->total_size
;
23290 if (FRAME_GROWS_DOWNWARD
)
23291 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
23293 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
23294 offset
= FRAME_GROWS_DOWNWARD
23295 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
23297 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
23298 offset
= info
->total_size
;
23299 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23300 offset
= info
->push_p
? info
->total_size
: 0;
23301 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
23304 gcc_unreachable ();
23309 /* Fill in sizes of registers used by unwinder. */
23312 rs6000_init_dwarf_reg_sizes_extra (tree address
)
23314 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
23317 machine_mode mode
= TYPE_MODE (char_type_node
);
23318 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
23319 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
23320 rtx value
= gen_int_mode (16, mode
);
23322 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23323 The unwinder still needs to know the size of Altivec registers. */
23325 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
23327 int column
= DWARF_REG_TO_UNWIND_COLUMN
23328 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
23329 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
23331 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
23336 /* Map internal gcc register numbers to debug format register numbers.
23337 FORMAT specifies the type of debug register number to use:
23338 0 -- debug information, except for frame-related sections
23339 1 -- DWARF .debug_frame section
23340 2 -- DWARF .eh_frame section */
23343 rs6000_dbx_register_number (unsigned int regno
, unsigned int format
)
23345 /* On some platforms, we use the standard DWARF register
23346 numbering for .debug_info and .debug_frame. */
23347 if ((format
== 0 && write_symbols
== DWARF2_DEBUG
) || format
== 1)
23349 #ifdef RS6000_USE_DWARF_NUMBERING
23352 if (FP_REGNO_P (regno
))
23353 return regno
- FIRST_FPR_REGNO
+ 32;
23354 if (ALTIVEC_REGNO_P (regno
))
23355 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
23356 if (regno
== LR_REGNO
)
23358 if (regno
== CTR_REGNO
)
23360 if (regno
== CA_REGNO
)
23361 return 101; /* XER */
23362 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23363 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23364 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23365 to the DWARF reg for CR. */
23366 if (format
== 1 && regno
== CR2_REGNO
)
23368 if (CR_REGNO_P (regno
))
23369 return regno
- CR0_REGNO
+ 86;
23370 if (regno
== VRSAVE_REGNO
)
23372 if (regno
== VSCR_REGNO
)
23375 /* These do not make much sense. */
23376 if (regno
== FRAME_POINTER_REGNUM
)
23378 if (regno
== ARG_POINTER_REGNUM
)
23383 gcc_unreachable ();
23387 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23388 information, and also for .eh_frame. */
23389 /* Translate the regnos to their numbers in GCC 7 (and before). */
23392 if (FP_REGNO_P (regno
))
23393 return regno
- FIRST_FPR_REGNO
+ 32;
23394 if (ALTIVEC_REGNO_P (regno
))
23395 return regno
- FIRST_ALTIVEC_REGNO
+ 77;
23396 if (regno
== LR_REGNO
)
23398 if (regno
== CTR_REGNO
)
23400 if (regno
== CA_REGNO
)
23401 return 76; /* XER */
23402 if (CR_REGNO_P (regno
))
23403 return regno
- CR0_REGNO
+ 68;
23404 if (regno
== VRSAVE_REGNO
)
23406 if (regno
== VSCR_REGNO
)
23409 if (regno
== FRAME_POINTER_REGNUM
)
23411 if (regno
== ARG_POINTER_REGNUM
)
23416 gcc_unreachable ();
23419 /* target hook eh_return_filter_mode */
23420 static scalar_int_mode
23421 rs6000_eh_return_filter_mode (void)
23423 return TARGET_32BIT
? SImode
: word_mode
;
23426 /* Target hook for translate_mode_attribute. */
23427 static machine_mode
23428 rs6000_translate_mode_attribute (machine_mode mode
)
23430 if ((FLOAT128_IEEE_P (mode
)
23431 && ieee128_float_type_node
== long_double_type_node
)
23432 || (FLOAT128_IBM_P (mode
)
23433 && ibm128_float_type_node
== long_double_type_node
))
23434 return COMPLEX_MODE_P (mode
) ? E_TCmode
: E_TFmode
;
23438 /* Target hook for scalar_mode_supported_p. */
23440 rs6000_scalar_mode_supported_p (scalar_mode mode
)
23442 /* -m32 does not support TImode. This is the default, from
23443 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23444 same ABI as for -m32. But default_scalar_mode_supported_p allows
23445 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23446 for -mpowerpc64. */
23447 if (TARGET_32BIT
&& mode
== TImode
)
23450 if (DECIMAL_FLOAT_MODE_P (mode
))
23451 return default_decimal_float_supported_p ();
23452 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
23455 return default_scalar_mode_supported_p (mode
);
23458 /* Target hook for vector_mode_supported_p. */
23460 rs6000_vector_mode_supported_p (machine_mode mode
)
23462 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23463 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23465 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
23472 /* Target hook for floatn_mode. */
23473 static opt_scalar_float_mode
23474 rs6000_floatn_mode (int n
, bool extended
)
23484 if (TARGET_FLOAT128_TYPE
)
23485 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
23487 return opt_scalar_float_mode ();
23490 return opt_scalar_float_mode ();
23493 /* Those are the only valid _FloatNx types. */
23494 gcc_unreachable ();
23508 if (TARGET_FLOAT128_TYPE
)
23509 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
23511 return opt_scalar_float_mode ();
23514 return opt_scalar_float_mode ();
23520 /* Target hook for c_mode_for_suffix. */
23521 static machine_mode
23522 rs6000_c_mode_for_suffix (char suffix
)
23524 if (TARGET_FLOAT128_TYPE
)
23526 if (suffix
== 'q' || suffix
== 'Q')
23527 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
23529 /* At the moment, we are not defining a suffix for IBM extended double.
23530 If/when the default for -mabi=ieeelongdouble is changed, and we want
23531 to support __ibm128 constants in legacy library code, we may need to
23532 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
23533 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
23534 __float80 constants. */
23540 /* Target hook for invalid_arg_for_unprototyped_fn. */
23541 static const char *
23542 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
23544 return (!rs6000_darwin64_abi
23546 && TREE_CODE (TREE_TYPE (val
)) == VECTOR_TYPE
23547 && (funcdecl
== NULL_TREE
23548 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
23549 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
23550 ? N_("AltiVec argument passed to unprototyped function")
23554 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23555 setup by using __stack_chk_fail_local hidden function instead of
23556 calling __stack_chk_fail directly. Otherwise it is better to call
23557 __stack_chk_fail directly. */
23559 static tree ATTRIBUTE_UNUSED
23560 rs6000_stack_protect_fail (void)
23562 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
23563 ? default_hidden_stack_protect_fail ()
23564 : default_external_stack_protect_fail ();
23567 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23570 static unsigned HOST_WIDE_INT
23571 rs6000_asan_shadow_offset (void)
23573 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
23577 /* Mask options that we want to support inside of attribute((target)) and
23578 #pragma GCC target operations. Note, we do not include things like
23579 64/32-bit, endianness, hard/soft floating point, etc. that would have
23580 different calling sequences. */
23582 struct rs6000_opt_mask
{
23583 const char *name
; /* option name */
23584 HOST_WIDE_INT mask
; /* mask to set */
23585 bool invert
; /* invert sense of mask */
23586 bool valid_target
; /* option is a target option */
23589 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
23591 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
23592 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
,
23594 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
,
23596 { "cmpb", OPTION_MASK_CMPB
, false, true },
23597 { "crypto", OPTION_MASK_CRYPTO
, false, true },
23598 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
23599 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
23600 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
23602 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, true },
23603 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, true },
23604 { "fprnd", OPTION_MASK_FPRND
, false, true },
23605 { "power10", OPTION_MASK_POWER10
, false, true },
23606 { "hard-dfp", OPTION_MASK_DFP
, false, true },
23607 { "htm", OPTION_MASK_HTM
, false, true },
23608 { "isel", OPTION_MASK_ISEL
, false, true },
23609 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
23610 { "mfpgpr", 0, false, true },
23611 { "mma", OPTION_MASK_MMA
, false, true },
23612 { "modulo", OPTION_MASK_MODULO
, false, true },
23613 { "mulhw", OPTION_MASK_MULHW
, false, true },
23614 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
23615 { "pcrel", OPTION_MASK_PCREL
, false, true },
23616 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
23617 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
23618 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
23619 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
23620 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
23621 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
23622 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
23623 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
23624 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
23625 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
23626 { "prefixed", OPTION_MASK_PREFIXED
, false, true },
23627 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
23628 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
23629 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
23630 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
23631 { "string", 0, false, true },
23632 { "update", OPTION_MASK_NO_UPDATE
, true , true },
23633 { "vsx", OPTION_MASK_VSX
, false, true },
23634 #ifdef OPTION_MASK_64BIT
23636 { "aix64", OPTION_MASK_64BIT
, false, false },
23637 { "aix32", OPTION_MASK_64BIT
, true, false },
23639 { "64", OPTION_MASK_64BIT
, false, false },
23640 { "32", OPTION_MASK_64BIT
, true, false },
23643 #ifdef OPTION_MASK_EABI
23644 { "eabi", OPTION_MASK_EABI
, false, false },
23646 #ifdef OPTION_MASK_LITTLE_ENDIAN
23647 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
23648 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
23650 #ifdef OPTION_MASK_RELOCATABLE
23651 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
23653 #ifdef OPTION_MASK_STRICT_ALIGN
23654 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
23656 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
23657 { "string", 0, false, false },
23660 /* Builtin mask mapping for printing the flags. */
23661 static struct rs6000_opt_mask
const rs6000_builtin_mask_names
[] =
23663 { "altivec", RS6000_BTM_ALTIVEC
, false, false },
23664 { "vsx", RS6000_BTM_VSX
, false, false },
23665 { "fre", RS6000_BTM_FRE
, false, false },
23666 { "fres", RS6000_BTM_FRES
, false, false },
23667 { "frsqrte", RS6000_BTM_FRSQRTE
, false, false },
23668 { "frsqrtes", RS6000_BTM_FRSQRTES
, false, false },
23669 { "popcntd", RS6000_BTM_POPCNTD
, false, false },
23670 { "cell", RS6000_BTM_CELL
, false, false },
23671 { "power8-vector", RS6000_BTM_P8_VECTOR
, false, false },
23672 { "power9-vector", RS6000_BTM_P9_VECTOR
, false, false },
23673 { "power9-misc", RS6000_BTM_P9_MISC
, false, false },
23674 { "crypto", RS6000_BTM_CRYPTO
, false, false },
23675 { "htm", RS6000_BTM_HTM
, false, false },
23676 { "hard-dfp", RS6000_BTM_DFP
, false, false },
23677 { "hard-float", RS6000_BTM_HARD_FLOAT
, false, false },
23678 { "long-double-128", RS6000_BTM_LDBL128
, false, false },
23679 { "powerpc64", RS6000_BTM_POWERPC64
, false, false },
23680 { "float128", RS6000_BTM_FLOAT128
, false, false },
23681 { "float128-hw", RS6000_BTM_FLOAT128_HW
,false, false },
23682 { "mma", RS6000_BTM_MMA
, false, false },
23683 { "power10", RS6000_BTM_P10
, false, false },
23686 /* Option variables that we want to support inside attribute((target)) and
23687 #pragma GCC target operations. */
23689 struct rs6000_opt_var
{
23690 const char *name
; /* option name */
23691 size_t global_offset
; /* offset of the option in global_options. */
23692 size_t target_offset
; /* offset of the option in target options. */
23695 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
23698 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
23699 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
23700 { "avoid-indexed-addresses",
23701 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
23702 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
23704 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
23705 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
23706 { "optimize-swaps",
23707 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
23708 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
23709 { "allow-movmisalign",
23710 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
23711 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
23713 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
23714 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
23716 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
23717 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
23718 { "align-branch-targets",
23719 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
23720 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
23722 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
23723 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
23725 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
23726 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
23727 { "speculate-indirect-jumps",
23728 offsetof (struct gcc_options
, x_rs6000_speculate_indirect_jumps
),
23729 offsetof (struct cl_target_option
, x_rs6000_speculate_indirect_jumps
), },
23732 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
23733 parsing. Return true if there were no errors. */
23736 rs6000_inner_target_options (tree args
, bool attr_p
)
23740 if (args
== NULL_TREE
)
23743 else if (TREE_CODE (args
) == STRING_CST
)
23745 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
23748 while ((q
= strtok (p
, ",")) != NULL
)
23750 bool error_p
= false;
23751 bool not_valid_p
= false;
23752 const char *cpu_opt
= NULL
;
23755 if (strncmp (q
, "cpu=", 4) == 0)
23757 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
23758 if (cpu_index
>= 0)
23759 rs6000_cpu_index
= cpu_index
;
23766 else if (strncmp (q
, "tune=", 5) == 0)
23768 int tune_index
= rs6000_cpu_name_lookup (q
+5);
23769 if (tune_index
>= 0)
23770 rs6000_tune_index
= tune_index
;
23780 bool invert
= false;
23784 if (strncmp (r
, "no-", 3) == 0)
23790 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
23791 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
23793 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
23795 if (!rs6000_opt_masks
[i
].valid_target
)
23796 not_valid_p
= true;
23800 rs6000_isa_flags_explicit
|= mask
;
23802 /* VSX needs altivec, so -mvsx automagically sets
23803 altivec and disables -mavoid-indexed-addresses. */
23806 if (mask
== OPTION_MASK_VSX
)
23808 mask
|= OPTION_MASK_ALTIVEC
;
23809 TARGET_AVOID_XFORM
= 0;
23813 if (rs6000_opt_masks
[i
].invert
)
23817 rs6000_isa_flags
&= ~mask
;
23819 rs6000_isa_flags
|= mask
;
23824 if (error_p
&& !not_valid_p
)
23826 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
23827 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
23829 size_t j
= rs6000_opt_vars
[i
].global_offset
;
23830 *((int *) ((char *)&global_options
+ j
)) = !invert
;
23832 not_valid_p
= false;
23840 const char *eprefix
, *esuffix
;
23845 eprefix
= "__attribute__((__target__(";
23850 eprefix
= "#pragma GCC target ";
23855 error ("invalid cpu %qs for %s%qs%s", cpu_opt
, eprefix
,
23857 else if (not_valid_p
)
23858 error ("%s%qs%s is not allowed", eprefix
, q
, esuffix
);
23860 error ("%s%qs%s is invalid", eprefix
, q
, esuffix
);
23865 else if (TREE_CODE (args
) == TREE_LIST
)
23869 tree value
= TREE_VALUE (args
);
23872 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
23876 args
= TREE_CHAIN (args
);
23878 while (args
!= NULL_TREE
);
23883 error ("attribute %<target%> argument not a string");
23890 /* Print out the target options as a list for -mdebug=target. */
23893 rs6000_debug_target_options (tree args
, const char *prefix
)
23895 if (args
== NULL_TREE
)
23896 fprintf (stderr
, "%s<NULL>", prefix
);
23898 else if (TREE_CODE (args
) == STRING_CST
)
23900 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
23903 while ((q
= strtok (p
, ",")) != NULL
)
23906 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
23911 else if (TREE_CODE (args
) == TREE_LIST
)
23915 tree value
= TREE_VALUE (args
);
23918 rs6000_debug_target_options (value
, prefix
);
23921 args
= TREE_CHAIN (args
);
23923 while (args
!= NULL_TREE
);
23927 gcc_unreachable ();
23933 /* Hook to validate attribute((target("..."))). */
23936 rs6000_valid_attribute_p (tree fndecl
,
23937 tree
ARG_UNUSED (name
),
23941 struct cl_target_option cur_target
;
23944 tree new_target
, new_optimize
;
23945 tree func_optimize
;
23947 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
23949 if (TARGET_DEBUG_TARGET
)
23951 tree tname
= DECL_NAME (fndecl
);
23952 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
23954 fprintf (stderr
, "function: %.*s\n",
23955 (int) IDENTIFIER_LENGTH (tname
),
23956 IDENTIFIER_POINTER (tname
));
23958 fprintf (stderr
, "function: unknown\n");
23960 fprintf (stderr
, "args:");
23961 rs6000_debug_target_options (args
, " ");
23962 fprintf (stderr
, "\n");
23965 fprintf (stderr
, "flags: 0x%x\n", flags
);
23967 fprintf (stderr
, "--------------------\n");
23970 /* attribute((target("default"))) does nothing, beyond
23971 affecting multi-versioning. */
23972 if (TREE_VALUE (args
)
23973 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
23974 && TREE_CHAIN (args
) == NULL_TREE
23975 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
23978 old_optimize
= build_optimization_node (&global_options
,
23979 &global_options_set
);
23980 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
23982 /* If the function changed the optimization levels as well as setting target
23983 options, start with the optimizations specified. */
23984 if (func_optimize
&& func_optimize
!= old_optimize
)
23985 cl_optimization_restore (&global_options
, &global_options_set
,
23986 TREE_OPTIMIZATION (func_optimize
));
23988 /* The target attributes may also change some optimization flags, so update
23989 the optimization options if necessary. */
23990 cl_target_option_save (&cur_target
, &global_options
, &global_options_set
);
23991 rs6000_cpu_index
= rs6000_tune_index
= -1;
23992 ret
= rs6000_inner_target_options (args
, true);
23994 /* Set up any additional state. */
23997 ret
= rs6000_option_override_internal (false);
23998 new_target
= build_target_option_node (&global_options
,
23999 &global_options_set
);
24004 new_optimize
= build_optimization_node (&global_options
,
24005 &global_options_set
);
24012 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
24014 if (old_optimize
!= new_optimize
)
24015 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
24018 cl_target_option_restore (&global_options
, &global_options_set
, &cur_target
);
24020 if (old_optimize
!= new_optimize
)
24021 cl_optimization_restore (&global_options
, &global_options_set
,
24022 TREE_OPTIMIZATION (old_optimize
));
24028 /* Hook to validate the current #pragma GCC target and set the state, and
24029 update the macros based on what was changed. If ARGS is NULL, then
24030 POP_TARGET is used to reset the options. */
24033 rs6000_pragma_target_parse (tree args
, tree pop_target
)
24035 tree prev_tree
= build_target_option_node (&global_options
,
24036 &global_options_set
);
24038 struct cl_target_option
*prev_opt
, *cur_opt
;
24039 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
24040 HOST_WIDE_INT prev_bumask
, cur_bumask
, diff_bumask
;
24042 if (TARGET_DEBUG_TARGET
)
24044 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
24045 fprintf (stderr
, "args:");
24046 rs6000_debug_target_options (args
, " ");
24047 fprintf (stderr
, "\n");
24051 fprintf (stderr
, "pop_target:\n");
24052 debug_tree (pop_target
);
24055 fprintf (stderr
, "pop_target: <NULL>\n");
24057 fprintf (stderr
, "--------------------\n");
24062 cur_tree
= ((pop_target
)
24064 : target_option_default_node
);
24065 cl_target_option_restore (&global_options
, &global_options_set
,
24066 TREE_TARGET_OPTION (cur_tree
));
24070 rs6000_cpu_index
= rs6000_tune_index
= -1;
24071 if (!rs6000_inner_target_options (args
, false)
24072 || !rs6000_option_override_internal (false)
24073 || (cur_tree
= build_target_option_node (&global_options
,
24074 &global_options_set
))
24077 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
24078 fprintf (stderr
, "invalid pragma\n");
24084 target_option_current_node
= cur_tree
;
24085 rs6000_activate_target_options (target_option_current_node
);
24087 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24088 change the macros that are defined. */
24089 if (rs6000_target_modify_macros_ptr
)
24091 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
24092 prev_bumask
= prev_opt
->x_rs6000_builtin_mask
;
24093 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
24095 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
24096 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
24097 cur_bumask
= cur_opt
->x_rs6000_builtin_mask
;
24099 diff_bumask
= (prev_bumask
^ cur_bumask
);
24100 diff_flags
= (prev_flags
^ cur_flags
);
24102 if ((diff_flags
!= 0) || (diff_bumask
!= 0))
24104 /* Delete old macros. */
24105 rs6000_target_modify_macros_ptr (false,
24106 prev_flags
& diff_flags
,
24107 prev_bumask
& diff_bumask
);
24109 /* Define new macros. */
24110 rs6000_target_modify_macros_ptr (true,
24111 cur_flags
& diff_flags
,
24112 cur_bumask
& diff_bumask
);
24120 /* Remember the last target of rs6000_set_current_function. */
24121 static GTY(()) tree rs6000_previous_fndecl
;
24123 /* Restore target's globals from NEW_TREE and invalidate the
24124 rs6000_previous_fndecl cache. */
24127 rs6000_activate_target_options (tree new_tree
)
24129 cl_target_option_restore (&global_options
, &global_options_set
,
24130 TREE_TARGET_OPTION (new_tree
));
24131 if (TREE_TARGET_GLOBALS (new_tree
))
24132 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
24133 else if (new_tree
== target_option_default_node
)
24134 restore_target_globals (&default_target_globals
);
24136 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
24137 rs6000_previous_fndecl
= NULL_TREE
;
24140 /* Establish appropriate back-end context for processing the function
24141 FNDECL. The argument might be NULL to indicate processing at top
24142 level, outside of any function scope. */
24144 rs6000_set_current_function (tree fndecl
)
24146 if (TARGET_DEBUG_TARGET
)
24148 fprintf (stderr
, "\n==================== rs6000_set_current_function");
24151 fprintf (stderr
, ", fndecl %s (%p)",
24152 (DECL_NAME (fndecl
)
24153 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
24154 : "<unknown>"), (void *)fndecl
);
24156 if (rs6000_previous_fndecl
)
24157 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
24159 fprintf (stderr
, "\n");
24162 /* Only change the context if the function changes. This hook is called
24163 several times in the course of compiling a function, and we don't want to
24164 slow things down too much or call target_reinit when it isn't safe. */
24165 if (fndecl
== rs6000_previous_fndecl
)
24169 if (rs6000_previous_fndecl
== NULL_TREE
)
24170 old_tree
= target_option_current_node
;
24171 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
))
24172 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
);
24174 old_tree
= target_option_default_node
;
24177 if (fndecl
== NULL_TREE
)
24179 if (old_tree
!= target_option_current_node
)
24180 new_tree
= target_option_current_node
;
24182 new_tree
= NULL_TREE
;
24186 new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
24187 if (new_tree
== NULL_TREE
)
24188 new_tree
= target_option_default_node
;
24191 if (TARGET_DEBUG_TARGET
)
24195 fprintf (stderr
, "\nnew fndecl target specific options:\n");
24196 debug_tree (new_tree
);
24201 fprintf (stderr
, "\nold fndecl target specific options:\n");
24202 debug_tree (old_tree
);
24205 if (old_tree
!= NULL_TREE
|| new_tree
!= NULL_TREE
)
24206 fprintf (stderr
, "--------------------\n");
24209 if (new_tree
&& old_tree
!= new_tree
)
24210 rs6000_activate_target_options (new_tree
);
24213 rs6000_previous_fndecl
= fndecl
;
24217 /* Save the current options */
24220 rs6000_function_specific_save (struct cl_target_option
*ptr
,
24221 struct gcc_options
*opts
,
24222 struct gcc_options */
* opts_set */
)
24224 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
24225 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
24228 /* Restore the current options */
24231 rs6000_function_specific_restore (struct gcc_options
*opts
,
24232 struct gcc_options */
* opts_set */
,
24233 struct cl_target_option
*ptr
)
24236 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
24237 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
24238 (void) rs6000_option_override_internal (false);
24241 /* Print the current options */
24244 rs6000_function_specific_print (FILE *file
, int indent
,
24245 struct cl_target_option
*ptr
)
24247 rs6000_print_isa_options (file
, indent
, "Isa options set",
24248 ptr
->x_rs6000_isa_flags
);
24250 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
24251 ptr
->x_rs6000_isa_flags_explicit
);
24254 /* Helper function to print the current isa or misc options on a line. */
24257 rs6000_print_options_internal (FILE *file
,
24259 const char *string
,
24260 HOST_WIDE_INT flags
,
24261 const char *prefix
,
24262 const struct rs6000_opt_mask
*opts
,
24263 size_t num_elements
)
24266 size_t start_column
= 0;
24268 size_t max_column
= 120;
24269 size_t prefix_len
= strlen (prefix
);
24270 size_t comma_len
= 0;
24271 const char *comma
= "";
24274 start_column
+= fprintf (file
, "%*s", indent
, "");
24278 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
24282 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
24284 /* Print the various mask options. */
24285 cur_column
= start_column
;
24286 for (i
= 0; i
< num_elements
; i
++)
24288 bool invert
= opts
[i
].invert
;
24289 const char *name
= opts
[i
].name
;
24290 const char *no_str
= "";
24291 HOST_WIDE_INT mask
= opts
[i
].mask
;
24292 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
24296 if ((flags
& mask
) == 0)
24299 len
+= strlen ("no-");
24307 if ((flags
& mask
) != 0)
24310 len
+= strlen ("no-");
24317 if (cur_column
> max_column
)
24319 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
24320 cur_column
= start_column
+ len
;
24324 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
24326 comma_len
= strlen (", ");
24329 fputs ("\n", file
);
24332 /* Helper function to print the current isa options on a line. */
24335 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
24336 HOST_WIDE_INT flags
)
24338 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
24339 &rs6000_opt_masks
[0],
24340 ARRAY_SIZE (rs6000_opt_masks
));
24344 rs6000_print_builtin_options (FILE *file
, int indent
, const char *string
,
24345 HOST_WIDE_INT flags
)
24347 rs6000_print_options_internal (file
, indent
, string
, flags
, "",
24348 &rs6000_builtin_mask_names
[0],
24349 ARRAY_SIZE (rs6000_builtin_mask_names
));
24352 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24353 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24354 -mupper-regs-df, etc.).
24356 If the user used -mno-power8-vector, we need to turn off all of the implicit
24357 ISA 2.07 and 3.0 options that relate to the vector unit.
24359 If the user used -mno-power9-vector, we need to turn off all of the implicit
24360 ISA 3.0 options that relate to the vector unit.
24362 This function does not handle explicit options such as the user specifying
24363 -mdirect-move. These are handled in rs6000_option_override_internal, and
24364 the appropriate error is given if needed.
24366 We return a mask of all of the implicit options that should not be enabled
24369 static HOST_WIDE_INT
24370 rs6000_disable_incompatible_switches (void)
24372 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
24375 static const struct {
24376 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
24377 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
24378 const char *const name
; /* name of the switch. */
24380 { OPTION_MASK_POWER10
, OTHER_POWER10_MASKS
, "power10" },
24381 { OPTION_MASK_P9_VECTOR
, OTHER_P9_VECTOR_MASKS
, "power9-vector" },
24382 { OPTION_MASK_P8_VECTOR
, OTHER_P8_VECTOR_MASKS
, "power8-vector" },
24383 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
24384 { OPTION_MASK_ALTIVEC
, OTHER_ALTIVEC_MASKS
, "altivec" },
24387 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
24389 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
24391 if ((rs6000_isa_flags
& no_flag
) == 0
24392 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
24394 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
24395 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
24401 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
24402 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
24404 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
24405 error ("%<-mno-%s%> turns off %<-m%s%>",
24407 rs6000_opt_masks
[j
].name
);
24410 gcc_assert (!set_flags
);
24413 rs6000_isa_flags
&= ~dep_flags
;
24414 ignore_masks
|= no_flag
| dep_flags
;
24418 return ignore_masks
;
24422 /* Helper function for printing the function name when debugging. */
24424 static const char *
24425 get_decl_name (tree fn
)
24432 name
= DECL_NAME (fn
);
24434 return "<no-name>";
24436 return IDENTIFIER_POINTER (name
);
24439 /* Return the clone id of the target we are compiling code for in a target
24440 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24441 the priority list for the target clones (ordered from lowest to
24445 rs6000_clone_priority (tree fndecl
)
24447 tree fn_opts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
24448 HOST_WIDE_INT isa_masks
;
24449 int ret
= CLONE_DEFAULT
;
24450 tree attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (fndecl
));
24451 const char *attrs_str
= NULL
;
24453 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
24454 attrs_str
= TREE_STRING_POINTER (attrs
);
24456 /* Return priority zero for default function. Return the ISA needed for the
24457 function if it is not the default. */
24458 if (strcmp (attrs_str
, "default") != 0)
24460 if (fn_opts
== NULL_TREE
)
24461 fn_opts
= target_option_default_node
;
24463 if (!fn_opts
|| !TREE_TARGET_OPTION (fn_opts
))
24464 isa_masks
= rs6000_isa_flags
;
24466 isa_masks
= TREE_TARGET_OPTION (fn_opts
)->x_rs6000_isa_flags
;
24468 for (ret
= CLONE_MAX
- 1; ret
!= 0; ret
--)
24469 if ((rs6000_clone_map
[ret
].isa_mask
& isa_masks
) != 0)
24473 if (TARGET_DEBUG_TARGET
)
24474 fprintf (stderr
, "rs6000_get_function_version_priority (%s) => %d\n",
24475 get_decl_name (fndecl
), ret
);
24480 /* This compares the priority of target features in function DECL1 and DECL2.
24481 It returns positive value if DECL1 is higher priority, negative value if
24482 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24483 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24486 rs6000_compare_version_priority (tree decl1
, tree decl2
)
24488 int priority1
= rs6000_clone_priority (decl1
);
24489 int priority2
= rs6000_clone_priority (decl2
);
24490 int ret
= priority1
- priority2
;
24492 if (TARGET_DEBUG_TARGET
)
24493 fprintf (stderr
, "rs6000_compare_version_priority (%s, %s) => %d\n",
24494 get_decl_name (decl1
), get_decl_name (decl2
), ret
);
24499 /* Make a dispatcher declaration for the multi-versioned function DECL.
24500 Calls to DECL function will be replaced with calls to the dispatcher
24501 by the front-end. Returns the decl of the dispatcher function. */
24504 rs6000_get_function_versions_dispatcher (void *decl
)
24506 tree fn
= (tree
) decl
;
24507 struct cgraph_node
*node
= NULL
;
24508 struct cgraph_node
*default_node
= NULL
;
24509 struct cgraph_function_version_info
*node_v
= NULL
;
24510 struct cgraph_function_version_info
*first_v
= NULL
;
24512 tree dispatch_decl
= NULL
;
24514 struct cgraph_function_version_info
*default_version_info
= NULL
;
24515 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
24517 if (TARGET_DEBUG_TARGET
)
24518 fprintf (stderr
, "rs6000_get_function_versions_dispatcher (%s)\n",
24519 get_decl_name (fn
));
24521 node
= cgraph_node::get (fn
);
24522 gcc_assert (node
!= NULL
);
24524 node_v
= node
->function_version ();
24525 gcc_assert (node_v
!= NULL
);
24527 if (node_v
->dispatcher_resolver
!= NULL
)
24528 return node_v
->dispatcher_resolver
;
24530 /* Find the default version and make it the first node. */
24532 /* Go to the beginning of the chain. */
24533 while (first_v
->prev
!= NULL
)
24534 first_v
= first_v
->prev
;
24536 default_version_info
= first_v
;
24537 while (default_version_info
!= NULL
)
24539 const tree decl2
= default_version_info
->this_node
->decl
;
24540 if (is_function_default_version (decl2
))
24542 default_version_info
= default_version_info
->next
;
24545 /* If there is no default node, just return NULL. */
24546 if (default_version_info
== NULL
)
24549 /* Make default info the first node. */
24550 if (first_v
!= default_version_info
)
24552 default_version_info
->prev
->next
= default_version_info
->next
;
24553 if (default_version_info
->next
)
24554 default_version_info
->next
->prev
= default_version_info
->prev
;
24555 first_v
->prev
= default_version_info
;
24556 default_version_info
->next
= first_v
;
24557 default_version_info
->prev
= NULL
;
24560 default_node
= default_version_info
->this_node
;
24562 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24563 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
24564 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24565 "exports hardware capability bits");
24568 if (targetm
.has_ifunc_p ())
24570 struct cgraph_function_version_info
*it_v
= NULL
;
24571 struct cgraph_node
*dispatcher_node
= NULL
;
24572 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
24574 /* Right now, the dispatching is done via ifunc. */
24575 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
24577 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
24578 gcc_assert (dispatcher_node
!= NULL
);
24579 dispatcher_node
->dispatcher_function
= 1;
24580 dispatcher_version_info
24581 = dispatcher_node
->insert_new_function_version ();
24582 dispatcher_version_info
->next
= default_version_info
;
24583 dispatcher_node
->definition
= 1;
24585 /* Set the dispatcher for all the versions. */
24586 it_v
= default_version_info
;
24587 while (it_v
!= NULL
)
24589 it_v
->dispatcher_resolver
= dispatch_decl
;
24595 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
24596 "multiversioning needs ifunc which is not supported "
24601 return dispatch_decl
;
24604 /* Make the resolver function decl to dispatch the versions of a multi-
24605 versioned function, DEFAULT_DECL. Create an empty basic block in the
24606 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
24610 make_resolver_func (const tree default_decl
,
24611 const tree dispatch_decl
,
24612 basic_block
*empty_bb
)
24614 /* Make the resolver function static. The resolver function returns
24616 tree decl_name
= clone_function_name (default_decl
, "resolver");
24617 const char *resolver_name
= IDENTIFIER_POINTER (decl_name
);
24618 tree type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
24619 tree decl
= build_fn_decl (resolver_name
, type
);
24620 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
24622 DECL_NAME (decl
) = decl_name
;
24623 TREE_USED (decl
) = 1;
24624 DECL_ARTIFICIAL (decl
) = 1;
24625 DECL_IGNORED_P (decl
) = 0;
24626 TREE_PUBLIC (decl
) = 0;
24627 DECL_UNINLINABLE (decl
) = 1;
24629 /* Resolver is not external, body is generated. */
24630 DECL_EXTERNAL (decl
) = 0;
24631 DECL_EXTERNAL (dispatch_decl
) = 0;
24633 DECL_CONTEXT (decl
) = NULL_TREE
;
24634 DECL_INITIAL (decl
) = make_node (BLOCK
);
24635 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
24637 if (DECL_COMDAT_GROUP (default_decl
)
24638 || TREE_PUBLIC (default_decl
))
24640 /* In this case, each translation unit with a call to this
24641 versioned function will put out a resolver. Ensure it
24642 is comdat to keep just one copy. */
24643 DECL_COMDAT (decl
) = 1;
24644 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
24647 TREE_PUBLIC (dispatch_decl
) = 0;
24649 /* Build result decl and add to function_decl. */
24650 tree t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
24651 DECL_CONTEXT (t
) = decl
;
24652 DECL_ARTIFICIAL (t
) = 1;
24653 DECL_IGNORED_P (t
) = 1;
24654 DECL_RESULT (decl
) = t
;
24656 gimplify_function_tree (decl
);
24657 push_cfun (DECL_STRUCT_FUNCTION (decl
));
24658 *empty_bb
= init_lowered_empty_function (decl
, false,
24659 profile_count::uninitialized ());
24661 cgraph_node::add_new_function (decl
, true);
24662 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
24666 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
24667 DECL_ATTRIBUTES (dispatch_decl
)
24668 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
24670 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
24675 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
24676 return a pointer to VERSION_DECL if we are running on a machine that
24677 supports the index CLONE_ISA hardware architecture bits. This function will
24678 be called during version dispatch to decide which function version to
24679 execute. It returns the basic block at the end, to which more conditions
24683 add_condition_to_bb (tree function_decl
, tree version_decl
,
24684 int clone_isa
, basic_block new_bb
)
24686 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
24688 gcc_assert (new_bb
!= NULL
);
24689 gimple_seq gseq
= bb_seq (new_bb
);
24692 tree convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
24693 build_fold_addr_expr (version_decl
));
24694 tree result_var
= create_tmp_var (ptr_type_node
);
24695 gimple
*convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
24696 gimple
*return_stmt
= gimple_build_return (result_var
);
24698 if (clone_isa
== CLONE_DEFAULT
)
24700 gimple_seq_add_stmt (&gseq
, convert_stmt
);
24701 gimple_seq_add_stmt (&gseq
, return_stmt
);
24702 set_bb_seq (new_bb
, gseq
);
24703 gimple_set_bb (convert_stmt
, new_bb
);
24704 gimple_set_bb (return_stmt
, new_bb
);
24709 tree bool_zero
= build_int_cst (bool_int_type_node
, 0);
24710 tree cond_var
= create_tmp_var (bool_int_type_node
);
24711 tree predicate_decl
= rs6000_builtin_decls
[(int) RS6000_BUILTIN_CPU_SUPPORTS
];
24712 const char *arg_str
= rs6000_clone_map
[clone_isa
].name
;
24713 tree predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
24714 gimple
*call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
24715 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
24717 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
24718 gimple_set_bb (call_cond_stmt
, new_bb
);
24719 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
24721 gimple
*if_else_stmt
= gimple_build_cond (NE_EXPR
, cond_var
, bool_zero
,
24722 NULL_TREE
, NULL_TREE
);
24723 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
24724 gimple_set_bb (if_else_stmt
, new_bb
);
24725 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
24727 gimple_seq_add_stmt (&gseq
, convert_stmt
);
24728 gimple_seq_add_stmt (&gseq
, return_stmt
);
24729 set_bb_seq (new_bb
, gseq
);
24731 basic_block bb1
= new_bb
;
24732 edge e12
= split_block (bb1
, if_else_stmt
);
24733 basic_block bb2
= e12
->dest
;
24734 e12
->flags
&= ~EDGE_FALLTHRU
;
24735 e12
->flags
|= EDGE_TRUE_VALUE
;
24737 edge e23
= split_block (bb2
, return_stmt
);
24738 gimple_set_bb (convert_stmt
, bb2
);
24739 gimple_set_bb (return_stmt
, bb2
);
24741 basic_block bb3
= e23
->dest
;
24742 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
24745 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
24751 /* This function generates the dispatch function for multi-versioned functions.
24752 DISPATCH_DECL is the function which will contain the dispatch logic.
24753 FNDECLS are the function choices for dispatch, and is a tree chain.
24754 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
24755 code is generated. */
24758 dispatch_function_versions (tree dispatch_decl
,
24760 basic_block
*empty_bb
)
24764 vec
<tree
> *fndecls
;
24765 tree clones
[CLONE_MAX
];
24767 if (TARGET_DEBUG_TARGET
)
24768 fputs ("dispatch_function_versions, top\n", stderr
);
24770 gcc_assert (dispatch_decl
!= NULL
24771 && fndecls_p
!= NULL
24772 && empty_bb
!= NULL
);
24774 /* fndecls_p is actually a vector. */
24775 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
24777 /* At least one more version other than the default. */
24778 gcc_assert (fndecls
->length () >= 2);
24780 /* The first version in the vector is the default decl. */
24781 memset ((void *) clones
, '\0', sizeof (clones
));
24782 clones
[CLONE_DEFAULT
] = (*fndecls
)[0];
24784 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
24785 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
24786 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
24787 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24788 to insert the code here to do the call. */
24790 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
24792 int priority
= rs6000_clone_priority (ele
);
24793 if (!clones
[priority
])
24794 clones
[priority
] = ele
;
24797 for (ix
= CLONE_MAX
- 1; ix
>= 0; ix
--)
24800 if (TARGET_DEBUG_TARGET
)
24801 fprintf (stderr
, "dispatch_function_versions, clone %d, %s\n",
24802 ix
, get_decl_name (clones
[ix
]));
24804 *empty_bb
= add_condition_to_bb (dispatch_decl
, clones
[ix
], ix
,
24811 /* Generate the dispatching code body to dispatch multi-versioned function
24812 DECL. The target hook is called to process the "target" attributes and
24813 provide the code to dispatch the right function at run-time. NODE points
24814 to the dispatcher decl whose body will be created. */
24817 rs6000_generate_version_dispatcher_body (void *node_p
)
24820 basic_block empty_bb
;
24821 struct cgraph_node
*node
= (cgraph_node
*) node_p
;
24822 struct cgraph_function_version_info
*ninfo
= node
->function_version ();
24824 if (ninfo
->dispatcher_resolver
)
24825 return ninfo
->dispatcher_resolver
;
24827 /* node is going to be an alias, so remove the finalized bit. */
24828 node
->definition
= false;
24830 /* The first version in the chain corresponds to the default version. */
24831 ninfo
->dispatcher_resolver
= resolver
24832 = make_resolver_func (ninfo
->next
->this_node
->decl
, node
->decl
, &empty_bb
);
24834 if (TARGET_DEBUG_TARGET
)
24835 fprintf (stderr
, "rs6000_get_function_versions_dispatcher, %s\n",
24836 get_decl_name (resolver
));
24838 push_cfun (DECL_STRUCT_FUNCTION (resolver
));
24839 auto_vec
<tree
, 2> fn_ver_vec
;
24841 for (struct cgraph_function_version_info
*vinfo
= ninfo
->next
;
24843 vinfo
= vinfo
->next
)
24845 struct cgraph_node
*version
= vinfo
->this_node
;
24846 /* Check for virtual functions here again, as by this time it should
24847 have been determined if this function needs a vtable index or
24848 not. This happens for methods in derived classes that override
24849 virtual methods in base classes but are not explicitly marked as
24851 if (DECL_VINDEX (version
->decl
))
24852 sorry ("Virtual function multiversioning not supported");
24854 fn_ver_vec
.safe_push (version
->decl
);
24857 dispatch_function_versions (resolver
, &fn_ver_vec
, &empty_bb
);
24858 cgraph_edge::rebuild_edges ();
24864 /* Hook to determine if one function can safely inline another. */
24867 rs6000_can_inline_p (tree caller
, tree callee
)
24870 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
24871 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
24873 /* If the callee has no option attributes, then it is ok to inline. */
24879 HOST_WIDE_INT caller_isa
;
24880 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24881 HOST_WIDE_INT callee_isa
= callee_opts
->x_rs6000_isa_flags
;
24882 HOST_WIDE_INT explicit_isa
= callee_opts
->x_rs6000_isa_flags_explicit
;
24884 /* If the caller has option attributes, then use them.
24885 Otherwise, use the command line options. */
24887 caller_isa
= TREE_TARGET_OPTION (caller_tree
)->x_rs6000_isa_flags
;
24889 caller_isa
= rs6000_isa_flags
;
24891 /* The callee's options must be a subset of the caller's options, i.e.
24892 a vsx function may inline an altivec function, but a no-vsx function
24893 must not inline a vsx function. However, for those options that the
24894 callee has explicitly enabled or disabled, then we must enforce that
24895 the callee's and caller's options match exactly; see PR70010. */
24896 if (((caller_isa
& callee_isa
) == callee_isa
)
24897 && (caller_isa
& explicit_isa
) == (callee_isa
& explicit_isa
))
24901 if (TARGET_DEBUG_TARGET
)
24902 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24903 get_decl_name (caller
), get_decl_name (callee
),
24904 (ret
? "can" : "cannot"));
24909 /* Allocate a stack temp and fixup the address so it meets the particular
24910 memory requirements (either offetable or REG+REG addressing). */
24913 rs6000_allocate_stack_temp (machine_mode mode
,
24914 bool offsettable_p
,
24917 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
24918 rtx addr
= XEXP (stack
, 0);
24919 int strict_p
= reload_completed
;
24921 if (!legitimate_indirect_address_p (addr
, strict_p
))
24924 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
24925 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
24927 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
24928 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
24934 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24935 convert to such a form to deal with memory reference instructions
24936 like STFIWX and LDBRX that only take reg+reg addressing. */
24939 rs6000_force_indexed_or_indirect_mem (rtx x
)
24941 machine_mode mode
= GET_MODE (x
);
24943 gcc_assert (MEM_P (x
));
24944 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x
, mode
))
24946 rtx addr
= XEXP (x
, 0);
24947 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
24949 rtx reg
= XEXP (addr
, 0);
24950 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
24951 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
24952 gcc_assert (REG_P (reg
));
24953 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
24956 else if (GET_CODE (addr
) == PRE_MODIFY
)
24958 rtx reg
= XEXP (addr
, 0);
24959 rtx expr
= XEXP (addr
, 1);
24960 gcc_assert (REG_P (reg
));
24961 gcc_assert (GET_CODE (expr
) == PLUS
);
24962 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
24966 if (GET_CODE (addr
) == PLUS
)
24968 rtx op0
= XEXP (addr
, 0);
24969 rtx op1
= XEXP (addr
, 1);
24970 op0
= force_reg (Pmode
, op0
);
24971 op1
= force_reg (Pmode
, op1
);
24972 x
= replace_equiv_address (x
, gen_rtx_PLUS (Pmode
, op0
, op1
));
24975 x
= replace_equiv_address (x
, force_reg (Pmode
, addr
));
24981 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24983 On the RS/6000, all integer constants are acceptable, most won't be valid
24984 for particular insns, though. Only easy FP constants are acceptable. */
24987 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
24989 if (TARGET_ELF
&& tls_referenced_p (x
))
24992 if (CONST_DOUBLE_P (x
))
24993 return easy_fp_constant (x
, mode
);
24995 if (GET_CODE (x
) == CONST_VECTOR
)
24996 return easy_vector_constant (x
, mode
);
25002 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25005 chain_already_loaded (rtx_insn
*last
)
25007 for (; last
!= NULL
; last
= PREV_INSN (last
))
25009 if (NONJUMP_INSN_P (last
))
25011 rtx patt
= PATTERN (last
);
25013 if (GET_CODE (patt
) == SET
)
25015 rtx lhs
= XEXP (patt
, 0);
25017 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
25025 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25028 rs6000_call_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25030 rtx func
= func_desc
;
25031 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
25032 rtx toc_load
= NULL_RTX
;
25033 rtx toc_restore
= NULL_RTX
;
25035 rtx abi_reg
= NULL_RTX
;
25039 bool is_pltseq_longcall
;
25042 tlsarg
= global_tlsarg
;
25044 /* Handle longcall attributes. */
25045 is_pltseq_longcall
= false;
25046 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25047 && GET_CODE (func_desc
) == SYMBOL_REF
)
25049 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25051 is_pltseq_longcall
= true;
25054 /* Handle indirect calls. */
25055 if (!SYMBOL_REF_P (func
)
25056 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func
)))
25058 if (!rs6000_pcrel_p ())
25060 /* Save the TOC into its reserved slot before the call,
25061 and prepare to restore it after the call. */
25062 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
25063 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
25064 gen_rtvec (1, stack_toc_offset
),
25066 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
25068 /* Can we optimize saving the TOC in the prologue or
25069 do we need to do it at every call? */
25070 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
25071 cfun
->machine
->save_toc_in_prologue
= true;
25074 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
25075 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
25076 gen_rtx_PLUS (Pmode
, stack_ptr
,
25077 stack_toc_offset
));
25078 MEM_VOLATILE_P (stack_toc_mem
) = 1;
25079 if (is_pltseq_longcall
)
25081 rtvec v
= gen_rtvec (3, toc_reg
, func_desc
, tlsarg
);
25082 rtx mark_toc_reg
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25083 emit_insn (gen_rtx_SET (stack_toc_mem
, mark_toc_reg
));
25086 emit_move_insn (stack_toc_mem
, toc_reg
);
25090 if (DEFAULT_ABI
== ABI_ELFv2
)
25092 /* A function pointer in the ELFv2 ABI is just a plain address, but
25093 the ABI requires it to be loaded into r12 before the call. */
25094 func_addr
= gen_rtx_REG (Pmode
, 12);
25095 emit_move_insn (func_addr
, func
);
25096 abi_reg
= func_addr
;
25097 /* Indirect calls via CTR are strongly preferred over indirect
25098 calls via LR, so move the address there. Needed to mark
25099 this insn for linker plt sequence editing too. */
25100 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25101 if (is_pltseq_longcall
)
25103 rtvec v
= gen_rtvec (3, abi_reg
, func_desc
, tlsarg
);
25104 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25105 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25106 v
= gen_rtvec (2, func_addr
, func_desc
);
25107 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25110 emit_move_insn (func_addr
, abi_reg
);
25114 /* A function pointer under AIX is a pointer to a data area whose
25115 first word contains the actual address of the function, whose
25116 second word contains a pointer to its TOC, and whose third word
25117 contains a value to place in the static chain register (r11).
25118 Note that if we load the static chain, our "trampoline" need
25119 not have any executable code. */
25121 /* Load up address of the actual function. */
25122 func
= force_reg (Pmode
, func
);
25123 func_addr
= gen_reg_rtx (Pmode
);
25124 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func
));
25126 /* Indirect calls via CTR are strongly preferred over indirect
25127 calls via LR, so move the address there. */
25128 rtx ctr_reg
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25129 emit_move_insn (ctr_reg
, func_addr
);
25130 func_addr
= ctr_reg
;
25132 /* Prepare to load the TOC of the called function. Note that the
25133 TOC load must happen immediately before the actual call so
25134 that unwinding the TOC registers works correctly. See the
25135 comment in frob_update_context. */
25136 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
25137 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
25138 gen_rtx_PLUS (Pmode
, func
,
25140 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
25142 /* If we have a static chain, load it up. But, if the call was
25143 originally direct, the 3rd word has not been written since no
25144 trampoline has been built, so we ought not to load it, lest we
25145 override a static chain value. */
25146 if (!(GET_CODE (func_desc
) == SYMBOL_REF
25147 && SYMBOL_REF_FUNCTION_P (func_desc
))
25148 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25149 && !chain_already_loaded (get_current_sequence ()->next
->last
))
25151 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
25152 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
25153 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
25154 gen_rtx_PLUS (Pmode
, func
,
25156 emit_move_insn (sc_reg
, func_sc_mem
);
25163 /* No TOC register needed for calls from PC-relative callers. */
25164 if (!rs6000_pcrel_p ())
25165 /* Direct calls use the TOC: for local calls, the callee will
25166 assume the TOC register is set; for non-local calls, the
25167 PLT stub needs the TOC register. */
25172 /* Create the call. */
25173 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25174 if (value
!= NULL_RTX
)
25175 call
[0] = gen_rtx_SET (value
, call
[0]);
25176 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25180 call
[n_call
++] = toc_load
;
25182 call
[n_call
++] = toc_restore
;
25184 call
[n_call
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25186 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
25187 insn
= emit_call_insn (insn
);
25189 /* Mention all registers defined by the ABI to hold information
25190 as uses in CALL_INSN_FUNCTION_USAGE. */
25192 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25195 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25198 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25202 rtx r12
= NULL_RTX
;
25203 rtx func_addr
= func_desc
;
25205 gcc_assert (INTVAL (cookie
) == 0);
25208 tlsarg
= global_tlsarg
;
25210 /* For ELFv2, r12 and CTR need to hold the function address
25211 for an indirect call. */
25212 if (GET_CODE (func_desc
) != SYMBOL_REF
&& DEFAULT_ABI
== ABI_ELFv2
)
25214 r12
= gen_rtx_REG (Pmode
, 12);
25215 emit_move_insn (r12
, func_desc
);
25216 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25217 emit_move_insn (func_addr
, r12
);
25220 /* Create the call. */
25221 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25222 if (value
!= NULL_RTX
)
25223 call
[0] = gen_rtx_SET (value
, call
[0]);
25225 call
[1] = simple_return_rtx
;
25227 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
25228 insn
= emit_call_insn (insn
);
25230 /* Note use of the TOC register. */
25231 if (!rs6000_pcrel_p ())
25232 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
25233 gen_rtx_REG (Pmode
, TOC_REGNUM
));
25235 /* Note use of r12. */
25237 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), r12
);
25240 /* Expand code to perform a call under the SYSV4 ABI. */
25243 rs6000_call_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25245 rtx func
= func_desc
;
25249 rtx abi_reg
= NULL_RTX
;
25253 tlsarg
= global_tlsarg
;
25255 /* Handle longcall attributes. */
25256 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25257 && GET_CODE (func_desc
) == SYMBOL_REF
)
25259 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25260 /* If the longcall was implemented as an inline PLT call using
25261 PLT unspecs then func will be REG:r11. If not, func will be
25262 a pseudo reg. The inline PLT call sequence supports lazy
25263 linking (and longcalls to functions in dlopen'd libraries).
25264 The other style of longcalls don't. The lazy linking entry
25265 to the dynamic symbol resolver requires r11 be the function
25266 address (as it is for linker generated PLT stubs). Ensure
25267 r11 stays valid to the bctrl by marking r11 used by the call. */
25272 /* Handle indirect calls. */
25273 if (GET_CODE (func
) != SYMBOL_REF
)
25275 func
= force_reg (Pmode
, func
);
25277 /* Indirect calls via CTR are strongly preferred over indirect
25278 calls via LR, so move the address there. That can't be left
25279 to reload because we want to mark every instruction in an
25280 inline PLT call sequence with a reloc, enabling the linker to
25281 edit the sequence back to a direct call when that makes sense. */
25282 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25285 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
25286 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25287 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25288 v
= gen_rtvec (2, func_addr
, func_desc
);
25289 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25292 emit_move_insn (func_addr
, func
);
25297 /* Create the call. */
25298 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25299 if (value
!= NULL_RTX
)
25300 call
[0] = gen_rtx_SET (value
, call
[0]);
25302 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25304 if (TARGET_SECURE_PLT
25306 && GET_CODE (func_addr
) == SYMBOL_REF
25307 && !SYMBOL_REF_LOCAL_P (func_addr
))
25308 call
[n
++] = gen_rtx_USE (VOIDmode
, pic_offset_table_rtx
);
25310 call
[n
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25312 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n
, call
));
25313 insn
= emit_call_insn (insn
);
25315 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25318 /* Expand code to perform a sibling call under the SysV4 ABI. */
25321 rs6000_sibcall_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25323 rtx func
= func_desc
;
25327 rtx abi_reg
= NULL_RTX
;
25330 tlsarg
= global_tlsarg
;
25332 /* Handle longcall attributes. */
25333 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25334 && GET_CODE (func_desc
) == SYMBOL_REF
)
25336 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25337 /* If the longcall was implemented as an inline PLT call using
25338 PLT unspecs then func will be REG:r11. If not, func will be
25339 a pseudo reg. The inline PLT call sequence supports lazy
25340 linking (and longcalls to functions in dlopen'd libraries).
25341 The other style of longcalls don't. The lazy linking entry
25342 to the dynamic symbol resolver requires r11 be the function
25343 address (as it is for linker generated PLT stubs). Ensure
25344 r11 stays valid to the bctr by marking r11 used by the call. */
25349 /* Handle indirect calls. */
25350 if (GET_CODE (func
) != SYMBOL_REF
)
25352 func
= force_reg (Pmode
, func
);
25354 /* Indirect sibcalls must go via CTR. That can't be left to
25355 reload because we want to mark every instruction in an inline
25356 PLT call sequence with a reloc, enabling the linker to edit
25357 the sequence back to a direct call when that makes sense. */
25358 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25361 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
25362 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25363 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25364 v
= gen_rtvec (2, func_addr
, func_desc
);
25365 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25368 emit_move_insn (func_addr
, func
);
25373 /* Create the call. */
25374 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25375 if (value
!= NULL_RTX
)
25376 call
[0] = gen_rtx_SET (value
, call
[0]);
25378 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25379 call
[2] = simple_return_rtx
;
25381 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
25382 insn
= emit_call_insn (insn
);
25384 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25389 /* Expand code to perform a call under the Darwin ABI.
25390 Modulo handling of mlongcall, this is much the same as sysv.
25391 if/when the longcall optimisation is removed, we could drop this
25392 code and use the sysv case (taking care to avoid the tls stuff).
25394 We can use this for sibcalls too, if needed. */
25397 rs6000_call_darwin_1 (rtx value
, rtx func_desc
, rtx tlsarg
,
25398 rtx cookie
, bool sibcall
)
25400 rtx func
= func_desc
;
25404 int cookie_val
= INTVAL (cookie
);
25405 bool make_island
= false;
25407 /* Handle longcall attributes, there are two cases for Darwin:
25408 1) Newer linkers are capable of synthesising any branch islands needed.
25409 2) We need a helper branch island synthesised by the compiler.
25410 The second case has mostly been retired and we don't use it for m64.
25411 In fact, it's is an optimisation, we could just indirect as sysv does..
25412 ... however, backwards compatibility for now.
25413 If we're going to use this, then we need to keep the CALL_LONG bit set,
25414 so that we can pick up the special insn form later. */
25415 if ((cookie_val
& CALL_LONG
) != 0
25416 && GET_CODE (func_desc
) == SYMBOL_REF
)
25418 /* FIXME: the longcall opt should not hang off this flag, it is most
25419 likely incorrect for kernel-mode code-generation. */
25420 if (darwin_symbol_stubs
&& TARGET_32BIT
)
25421 make_island
= true; /* Do nothing yet, retain the CALL_LONG flag. */
25424 /* The linker is capable of doing this, but the user explicitly
25425 asked for -mlongcall, so we'll do the 'normal' version. */
25426 func
= rs6000_longcall_ref (func_desc
, NULL_RTX
);
25427 cookie_val
&= ~CALL_LONG
; /* Handled, zap it. */
25431 /* Handle indirect calls. */
25432 if (GET_CODE (func
) != SYMBOL_REF
)
25434 func
= force_reg (Pmode
, func
);
25436 /* Indirect calls via CTR are strongly preferred over indirect
25437 calls via LR, and are required for indirect sibcalls, so move
25438 the address there. */
25439 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25440 emit_move_insn (func_addr
, func
);
25445 /* Create the call. */
25446 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25447 if (value
!= NULL_RTX
)
25448 call
[0] = gen_rtx_SET (value
, call
[0]);
25450 call
[1] = gen_rtx_USE (VOIDmode
, GEN_INT (cookie_val
));
25453 call
[2] = simple_return_rtx
;
25455 call
[2] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25457 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
25458 insn
= emit_call_insn (insn
);
25459 /* Now we have the debug info in the insn, we can set up the branch island
25460 if we're using one. */
25463 tree funname
= get_identifier (XSTR (func_desc
, 0));
25465 if (no_previous_def (funname
))
25467 rtx label_rtx
= gen_label_rtx ();
25468 char *label_buf
, temp_buf
[256];
25469 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
25470 CODE_LABEL_NUMBER (label_rtx
));
25471 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
25472 tree labelname
= get_identifier (label_buf
);
25473 add_compiler_branch_island (labelname
, funname
,
25474 insn_line ((const rtx_insn
*)insn
));
25481 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
25482 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
25485 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, false);
25493 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
25494 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
25497 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, true);
25503 /* Return whether we should generate PC-relative code for FNDECL. */
25505 rs6000_fndecl_pcrel_p (const_tree fndecl
)
25507 if (DEFAULT_ABI
!= ABI_ELFv2
)
25510 struct cl_target_option
*opts
= target_opts_for_fn (fndecl
);
25512 return ((opts
->x_rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
25513 && TARGET_CMODEL
== CMODEL_MEDIUM
);
25516 /* Return whether we should generate PC-relative code for *FN. */
25518 rs6000_function_pcrel_p (struct function
*fn
)
25520 if (DEFAULT_ABI
!= ABI_ELFv2
)
25523 /* Optimize usual case. */
25525 return ((rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
25526 && TARGET_CMODEL
== CMODEL_MEDIUM
);
25528 return rs6000_fndecl_pcrel_p (fn
->decl
);
25531 /* Return whether we should generate PC-relative code for the current
25536 return (DEFAULT_ABI
== ABI_ELFv2
25537 && (rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
25538 && TARGET_CMODEL
== CMODEL_MEDIUM
);
25542 /* Given an address (ADDR), a mode (MODE), and what the format of the
25543 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
25544 for the address. */
25547 address_to_insn_form (rtx addr
,
25549 enum non_prefixed_form non_prefixed_format
)
25551 /* Single register is easy. */
25552 if (REG_P (addr
) || SUBREG_P (addr
))
25553 return INSN_FORM_BASE_REG
;
25555 /* If the non prefixed instruction format doesn't support offset addressing,
25556 make sure only indexed addressing is allowed.
25558 We special case SDmode so that the register allocator does not try to move
25559 SDmode through GPR registers, but instead uses the 32-bit integer load and
25560 store instructions for the floating point registers. */
25561 if (non_prefixed_format
== NON_PREFIXED_X
|| (mode
== SDmode
&& TARGET_DFP
))
25563 if (GET_CODE (addr
) != PLUS
)
25564 return INSN_FORM_BAD
;
25566 rtx op0
= XEXP (addr
, 0);
25567 rtx op1
= XEXP (addr
, 1);
25568 if (!REG_P (op0
) && !SUBREG_P (op0
))
25569 return INSN_FORM_BAD
;
25571 if (!REG_P (op1
) && !SUBREG_P (op1
))
25572 return INSN_FORM_BAD
;
25574 return INSN_FORM_X
;
25577 /* Deal with update forms. */
25578 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
25579 return INSN_FORM_UPDATE
;
25581 /* Handle PC-relative symbols and labels. Check for both local and
25582 external symbols. Assume labels are always local. TLS symbols
25583 are not PC-relative for rs6000. */
25586 if (LABEL_REF_P (addr
))
25587 return INSN_FORM_PCREL_LOCAL
;
25589 if (SYMBOL_REF_P (addr
) && !SYMBOL_REF_TLS_MODEL (addr
))
25591 if (!SYMBOL_REF_LOCAL_P (addr
))
25592 return INSN_FORM_PCREL_EXTERNAL
;
25594 return INSN_FORM_PCREL_LOCAL
;
25598 if (GET_CODE (addr
) == CONST
)
25599 addr
= XEXP (addr
, 0);
25601 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
25602 if (GET_CODE (addr
) == LO_SUM
)
25603 return INSN_FORM_LO_SUM
;
25605 /* Everything below must be an offset address of some form. */
25606 if (GET_CODE (addr
) != PLUS
)
25607 return INSN_FORM_BAD
;
25609 rtx op0
= XEXP (addr
, 0);
25610 rtx op1
= XEXP (addr
, 1);
25612 /* Check for indexed addresses. */
25613 if (REG_P (op1
) || SUBREG_P (op1
))
25615 if (REG_P (op0
) || SUBREG_P (op0
))
25616 return INSN_FORM_X
;
25618 return INSN_FORM_BAD
;
25621 if (!CONST_INT_P (op1
))
25622 return INSN_FORM_BAD
;
25624 HOST_WIDE_INT offset
= INTVAL (op1
);
25625 if (!SIGNED_INTEGER_34BIT_P (offset
))
25626 return INSN_FORM_BAD
;
25628 /* Check for local and external PC-relative addresses. Labels are always
25629 local. TLS symbols are not PC-relative for rs6000. */
25632 if (LABEL_REF_P (op0
))
25633 return INSN_FORM_PCREL_LOCAL
;
25635 if (SYMBOL_REF_P (op0
) && !SYMBOL_REF_TLS_MODEL (op0
))
25637 if (!SYMBOL_REF_LOCAL_P (op0
))
25638 return INSN_FORM_PCREL_EXTERNAL
;
25640 return INSN_FORM_PCREL_LOCAL
;
25644 /* If it isn't PC-relative, the address must use a base register. */
25645 if (!REG_P (op0
) && !SUBREG_P (op0
))
25646 return INSN_FORM_BAD
;
25648 /* Large offsets must be prefixed. */
25649 if (!SIGNED_INTEGER_16BIT_P (offset
))
25651 if (TARGET_PREFIXED
)
25652 return INSN_FORM_PREFIXED_NUMERIC
;
25654 return INSN_FORM_BAD
;
25657 /* We have a 16-bit offset, see what default instruction format to use. */
25658 if (non_prefixed_format
== NON_PREFIXED_DEFAULT
)
25660 unsigned size
= GET_MODE_SIZE (mode
);
25662 /* On 64-bit systems, assume 64-bit integers need to use DS form
25663 addresses (for LD/STD). VSX vectors need to use DQ form addresses
25664 (for LXV and STXV). TImode is problematical in that its normal usage
25665 is expected to be GPRs where it wants a DS instruction format, but if
25666 it goes into the vector registers, it wants a DQ instruction
25668 if (TARGET_POWERPC64
&& size
>= 8 && GET_MODE_CLASS (mode
) == MODE_INT
)
25669 non_prefixed_format
= NON_PREFIXED_DS
;
25671 else if (TARGET_VSX
&& size
>= 16
25672 && (VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
)))
25673 non_prefixed_format
= NON_PREFIXED_DQ
;
25676 non_prefixed_format
= NON_PREFIXED_D
;
25679 /* Classify the D/DS/DQ-form addresses. */
25680 switch (non_prefixed_format
)
25682 /* Instruction format D, all 16 bits are valid. */
25683 case NON_PREFIXED_D
:
25684 return INSN_FORM_D
;
25686 /* Instruction format DS, bottom 2 bits must be 0. */
25687 case NON_PREFIXED_DS
:
25688 if ((offset
& 3) == 0)
25689 return INSN_FORM_DS
;
25691 else if (TARGET_PREFIXED
)
25692 return INSN_FORM_PREFIXED_NUMERIC
;
25695 return INSN_FORM_BAD
;
25697 /* Instruction format DQ, bottom 4 bits must be 0. */
25698 case NON_PREFIXED_DQ
:
25699 if ((offset
& 15) == 0)
25700 return INSN_FORM_DQ
;
25702 else if (TARGET_PREFIXED
)
25703 return INSN_FORM_PREFIXED_NUMERIC
;
25706 return INSN_FORM_BAD
;
25712 return INSN_FORM_BAD
;
25715 /* Helper function to see if we're potentially looking at lfs/stfs.
25716 - PARALLEL containing a SET and a CLOBBER
25718 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
25719 - CLOBBER is a V4SF
25721 - SET is from UNSPEC_SF_FROM_SI to REG:SF
25726 is_lfs_stfs_insn (rtx_insn
*insn
)
25728 rtx pattern
= PATTERN (insn
);
25729 if (GET_CODE (pattern
) != PARALLEL
)
25732 /* This should be a parallel with exactly one set and one clobber. */
25733 if (XVECLEN (pattern
, 0) != 2)
25736 rtx set
= XVECEXP (pattern
, 0, 0);
25737 if (GET_CODE (set
) != SET
)
25740 rtx clobber
= XVECEXP (pattern
, 0, 1);
25741 if (GET_CODE (clobber
) != CLOBBER
)
25744 /* All we care is that the destination of the SET is a mem:SI,
25745 the source should be an UNSPEC_SI_FROM_SF, and the clobber
25746 should be a scratch:V4SF. */
25748 rtx dest
= SET_DEST (set
);
25749 rtx src
= SET_SRC (set
);
25750 rtx scratch
= SET_DEST (clobber
);
25752 if (GET_CODE (src
) != UNSPEC
)
25756 if (XINT (src
, 1) == UNSPEC_SI_FROM_SF
25757 && GET_CODE (dest
) == MEM
&& GET_MODE (dest
) == SImode
25758 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == V4SFmode
)
25762 if (XINT (src
, 1) == UNSPEC_SF_FROM_SI
25763 && GET_CODE (dest
) == REG
&& GET_MODE (dest
) == SFmode
25764 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == DImode
)
25770 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
25771 instruction format (D/DS/DQ) used for offset memory. */
25773 static enum non_prefixed_form
25774 reg_to_non_prefixed (rtx reg
, machine_mode mode
)
25776 /* If it isn't a register, use the defaults. */
25777 if (!REG_P (reg
) && !SUBREG_P (reg
))
25778 return NON_PREFIXED_DEFAULT
;
25780 unsigned int r
= reg_or_subregno (reg
);
25782 /* If we have a pseudo, use the default instruction format. */
25783 if (!HARD_REGISTER_NUM_P (r
))
25784 return NON_PREFIXED_DEFAULT
;
25786 unsigned size
= GET_MODE_SIZE (mode
);
25788 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
25789 128-bit floating point, and 128-bit integers. Before power9, only indexed
25790 addressing was available for vectors. */
25791 if (FP_REGNO_P (r
))
25793 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
25794 return NON_PREFIXED_D
;
25797 return NON_PREFIXED_X
;
25799 else if (TARGET_VSX
&& size
>= 16
25800 && (VECTOR_MODE_P (mode
)
25801 || VECTOR_ALIGNMENT_P (mode
)
25802 || mode
== TImode
|| mode
== CTImode
))
25803 return (TARGET_P9_VECTOR
) ? NON_PREFIXED_DQ
: NON_PREFIXED_X
;
25806 return NON_PREFIXED_DEFAULT
;
25809 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
25810 128-bit floating point, and 128-bit integers. Before power9, only indexed
25811 addressing was available. */
25812 else if (ALTIVEC_REGNO_P (r
))
25814 if (!TARGET_P9_VECTOR
)
25815 return NON_PREFIXED_X
;
25817 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
25818 return NON_PREFIXED_DS
;
25821 return NON_PREFIXED_X
;
25823 else if (TARGET_VSX
&& size
>= 16
25824 && (VECTOR_MODE_P (mode
)
25825 || VECTOR_ALIGNMENT_P (mode
)
25826 || mode
== TImode
|| mode
== CTImode
))
25827 return NON_PREFIXED_DQ
;
25830 return NON_PREFIXED_DEFAULT
;
25833 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
25834 otherwise. Assume that any other register, such as LR, CRs, etc. will go
25835 through the GPR registers for memory operations. */
25836 else if (TARGET_POWERPC64
&& size
>= 8)
25837 return NON_PREFIXED_DS
;
25839 return NON_PREFIXED_D
;
25843 /* Whether a load instruction is a prefixed instruction. This is called from
25844 the prefixed attribute processing. */
25847 prefixed_load_p (rtx_insn
*insn
)
25849 /* Validate the insn to make sure it is a normal load insn. */
25850 extract_insn_cached (insn
);
25851 if (recog_data
.n_operands
< 2)
25854 rtx reg
= recog_data
.operand
[0];
25855 rtx mem
= recog_data
.operand
[1];
25857 if (!REG_P (reg
) && !SUBREG_P (reg
))
25863 /* Prefixed load instructions do not support update or indexed forms. */
25864 if (get_attr_indexed (insn
) == INDEXED_YES
25865 || get_attr_update (insn
) == UPDATE_YES
)
25868 /* LWA uses the DS format instead of the D format that LWZ uses. */
25869 enum non_prefixed_form non_prefixed
;
25870 machine_mode reg_mode
= GET_MODE (reg
);
25871 machine_mode mem_mode
= GET_MODE (mem
);
25873 if (mem_mode
== SImode
&& reg_mode
== DImode
25874 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
25875 non_prefixed
= NON_PREFIXED_DS
;
25878 non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
25880 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
25881 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, NON_PREFIXED_DEFAULT
);
25883 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, non_prefixed
);
25886 /* Whether a store instruction is a prefixed instruction. This is called from
25887 the prefixed attribute processing. */
25890 prefixed_store_p (rtx_insn
*insn
)
25892 /* Validate the insn to make sure it is a normal store insn. */
25893 extract_insn_cached (insn
);
25894 if (recog_data
.n_operands
< 2)
25897 rtx mem
= recog_data
.operand
[0];
25898 rtx reg
= recog_data
.operand
[1];
25900 if (!REG_P (reg
) && !SUBREG_P (reg
))
25906 /* Prefixed store instructions do not support update or indexed forms. */
25907 if (get_attr_indexed (insn
) == INDEXED_YES
25908 || get_attr_update (insn
) == UPDATE_YES
)
25911 machine_mode mem_mode
= GET_MODE (mem
);
25912 rtx addr
= XEXP (mem
, 0);
25913 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
25915 /* Need to make sure we aren't looking at a stfs which doesn't look
25916 like the other things reg_to_non_prefixed/address_is_prefixed
25918 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
25919 return address_is_prefixed (addr
, mem_mode
, NON_PREFIXED_DEFAULT
);
25921 return address_is_prefixed (addr
, mem_mode
, non_prefixed
);
25924 /* Whether a load immediate or add instruction is a prefixed instruction. This
25925 is called from the prefixed attribute processing. */
25928 prefixed_paddi_p (rtx_insn
*insn
)
25930 rtx set
= single_set (insn
);
25934 rtx dest
= SET_DEST (set
);
25935 rtx src
= SET_SRC (set
);
25937 if (!REG_P (dest
) && !SUBREG_P (dest
))
25940 /* Is this a load immediate that can't be done with a simple ADDI or
25942 if (CONST_INT_P (src
))
25943 return (satisfies_constraint_eI (src
)
25944 && !satisfies_constraint_I (src
)
25945 && !satisfies_constraint_L (src
));
25947 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25949 if (GET_CODE (src
) == PLUS
)
25951 rtx op1
= XEXP (src
, 1);
25953 return (CONST_INT_P (op1
)
25954 && satisfies_constraint_eI (op1
)
25955 && !satisfies_constraint_I (op1
)
25956 && !satisfies_constraint_L (op1
));
25959 /* If not, is it a load of a PC-relative address? */
25960 if (!TARGET_PCREL
|| GET_MODE (dest
) != Pmode
)
25963 if (!SYMBOL_REF_P (src
) && !LABEL_REF_P (src
) && GET_CODE (src
) != CONST
)
25966 enum insn_form iform
= address_to_insn_form (src
, Pmode
,
25967 NON_PREFIXED_DEFAULT
);
25969 return (iform
== INSN_FORM_PCREL_EXTERNAL
|| iform
== INSN_FORM_PCREL_LOCAL
);
25972 /* Whether the next instruction needs a 'p' prefix issued before the
25973 instruction is printed out. */
25974 static bool next_insn_prefixed_p
;
25976 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25977 outputting the assembler code. On the PowerPC, we remember if the current
25978 insn is a prefixed insn where we need to emit a 'p' before the insn.
25980 In addition, if the insn is part of a PC-relative reference to an external
25981 label optimization, this is recorded also. */
25983 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
[], int)
25985 next_insn_prefixed_p
= (get_attr_prefixed (insn
) != PREFIXED_NO
);
25989 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25990 We use it to emit a 'p' for prefixed insns that is set in
25991 FINAL_PRESCAN_INSN. */
25993 rs6000_asm_output_opcode (FILE *stream
)
25995 if (next_insn_prefixed_p
)
25996 fprintf (stream
, "p");
26001 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26002 should be adjusted to reflect any required changes. This macro is used when
26003 there is some systematic length adjustment required that would be difficult
26004 to express in the length attribute.
26006 In the PowerPC, we use this to adjust the length of an instruction if one or
26007 more prefixed instructions are generated, using the attribute
26008 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26009 hardware requires that a prefied instruciton does not cross a 64-byte
26010 boundary. This means the compiler has to assume the length of the first
26011 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26012 already set for the non-prefixed instruction, we just need to udpate for the
26016 rs6000_adjust_insn_length (rtx_insn
*insn
, int length
)
26018 if (TARGET_PREFIXED
&& NONJUMP_INSN_P (insn
))
26020 rtx pattern
= PATTERN (insn
);
26021 if (GET_CODE (pattern
) != USE
&& GET_CODE (pattern
) != CLOBBER
26022 && get_attr_prefixed (insn
) == PREFIXED_YES
)
26024 int num_prefixed
= get_attr_max_prefixed_insns (insn
);
26025 length
+= 4 * (num_prefixed
+ 1);
26033 #ifdef HAVE_GAS_HIDDEN
26034 # define USE_HIDDEN_LINKONCE 1
26036 # define USE_HIDDEN_LINKONCE 0
26039 /* Fills in the label name that should be used for a 476 link stack thunk. */
26042 get_ppc476_thunk_name (char name
[32])
26044 gcc_assert (TARGET_LINK_STACK
);
26046 if (USE_HIDDEN_LINKONCE
)
26047 sprintf (name
, "__ppc476.get_thunk");
26049 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
26052 /* This function emits the simple thunk routine that is used to preserve
26053 the link stack on the 476 cpu. */
26055 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
26057 rs6000_code_end (void)
26062 if (!TARGET_LINK_STACK
)
26065 get_ppc476_thunk_name (name
);
26067 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
26068 build_function_type_list (void_type_node
, NULL_TREE
));
26069 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
26070 NULL_TREE
, void_type_node
);
26071 TREE_PUBLIC (decl
) = 1;
26072 TREE_STATIC (decl
) = 1;
26075 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
26077 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
26078 targetm
.asm_out
.unique_section (decl
, 0);
26079 switch_to_section (get_named_section (decl
, NULL
, 0));
26080 DECL_WEAK (decl
) = 1;
26081 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
26082 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
26083 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
26084 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
26089 switch_to_section (text_section
);
26090 ASM_OUTPUT_LABEL (asm_out_file
, name
);
26093 DECL_INITIAL (decl
) = make_node (BLOCK
);
26094 current_function_decl
= decl
;
26095 allocate_struct_function (decl
, false);
26096 init_function_start (decl
);
26097 first_function_block_is_cold
= false;
26098 /* Make sure unwind info is emitted for the thunk if needed. */
26099 final_start_function (emit_barrier (), asm_out_file
, 1);
26101 fputs ("\tblr\n", asm_out_file
);
26103 final_end_function ();
26104 init_insn_lengths ();
26105 free_after_compilation (cfun
);
26107 current_function_decl
= NULL
;
26110 /* Add r30 to hard reg set if the prologue sets it up and it is not
26111 pic_offset_table_rtx. */
26114 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
26116 if (!TARGET_SINGLE_PIC_BASE
26118 && TARGET_MINIMAL_TOC
26119 && !constant_pool_empty_p ())
26120 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
26121 if (cfun
->machine
->split_stack_argp_used
)
26122 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
26124 /* Make sure the hard reg set doesn't include r2, which was possibly added
26125 via PIC_OFFSET_TABLE_REGNUM. */
26127 remove_from_hard_reg_set (&set
->set
, Pmode
, TOC_REGNUM
);
26131 /* Helper function for rs6000_split_logical to emit a logical instruction after
26132 spliting the operation to single GPR registers.
26134 DEST is the destination register.
26135 OP1 and OP2 are the input source registers.
26136 CODE is the base operation (AND, IOR, XOR, NOT).
26137 MODE is the machine mode.
26138 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26139 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26140 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26143 rs6000_split_logical_inner (rtx dest
,
26146 enum rtx_code code
,
26148 bool complement_final_p
,
26149 bool complement_op1_p
,
26150 bool complement_op2_p
)
26154 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26155 if (op2
&& CONST_INT_P (op2
)
26156 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
26157 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
26159 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
26160 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
26162 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26167 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
26171 else if (value
== mask
)
26173 if (!rtx_equal_p (dest
, op1
))
26174 emit_insn (gen_rtx_SET (dest
, op1
));
26179 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26180 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26181 else if (code
== IOR
|| code
== XOR
)
26185 if (!rtx_equal_p (dest
, op1
))
26186 emit_insn (gen_rtx_SET (dest
, op1
));
26192 if (code
== AND
&& mode
== SImode
26193 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
26195 emit_insn (gen_andsi3 (dest
, op1
, op2
));
26199 if (complement_op1_p
)
26200 op1
= gen_rtx_NOT (mode
, op1
);
26202 if (complement_op2_p
)
26203 op2
= gen_rtx_NOT (mode
, op2
);
26205 /* For canonical RTL, if only one arm is inverted it is the first. */
26206 if (!complement_op1_p
&& complement_op2_p
)
26207 std::swap (op1
, op2
);
26209 bool_rtx
= ((code
== NOT
)
26210 ? gen_rtx_NOT (mode
, op1
)
26211 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
26213 if (complement_final_p
)
26214 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
26216 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
26219 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26220 operations are split immediately during RTL generation to allow for more
26221 optimizations of the AND/IOR/XOR.
26223 OPERANDS is an array containing the destination and two input operands.
26224 CODE is the base operation (AND, IOR, XOR, NOT).
26225 MODE is the machine mode.
26226 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26227 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26228 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26229 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26230 formation of the AND instructions. */
26233 rs6000_split_logical_di (rtx operands
[3],
26234 enum rtx_code code
,
26235 bool complement_final_p
,
26236 bool complement_op1_p
,
26237 bool complement_op2_p
)
26239 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
26240 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
26241 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
26242 enum hi_lo
{ hi
= 0, lo
= 1 };
26243 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
26246 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
26247 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
26248 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
26249 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
26252 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
26255 if (!CONST_INT_P (operands
[2]))
26257 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
26258 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
26262 HOST_WIDE_INT value
= INTVAL (operands
[2]);
26263 HOST_WIDE_INT value_hi_lo
[2];
26265 gcc_assert (!complement_final_p
);
26266 gcc_assert (!complement_op1_p
);
26267 gcc_assert (!complement_op2_p
);
26269 value_hi_lo
[hi
] = value
>> 32;
26270 value_hi_lo
[lo
] = value
& lower_32bits
;
26272 for (i
= 0; i
< 2; i
++)
26274 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
26276 if (sub_value
& sign_bit
)
26277 sub_value
|= upper_32bits
;
26279 op2_hi_lo
[i
] = GEN_INT (sub_value
);
26281 /* If this is an AND instruction, check to see if we need to load
26282 the value in a register. */
26283 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
26284 && !and_operand (op2_hi_lo
[i
], SImode
))
26285 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
26290 for (i
= 0; i
< 2; i
++)
26292 /* Split large IOR/XOR operations. */
26293 if ((code
== IOR
|| code
== XOR
)
26294 && CONST_INT_P (op2_hi_lo
[i
])
26295 && !complement_final_p
26296 && !complement_op1_p
26297 && !complement_op2_p
26298 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
26300 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
26301 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
26302 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
26303 rtx tmp
= gen_reg_rtx (SImode
);
26305 /* Make sure the constant is sign extended. */
26306 if ((hi_16bits
& sign_bit
) != 0)
26307 hi_16bits
|= upper_32bits
;
26309 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
26310 code
, SImode
, false, false, false);
26312 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
26313 code
, SImode
, false, false, false);
26316 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
26317 code
, SImode
, complement_final_p
,
26318 complement_op1_p
, complement_op2_p
);
26324 /* Split the insns that make up boolean operations operating on multiple GPR
26325 registers. The boolean MD patterns ensure that the inputs either are
26326 exactly the same as the output registers, or there is no overlap.
26328 OPERANDS is an array containing the destination and two input operands.
26329 CODE is the base operation (AND, IOR, XOR, NOT).
26330 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26331 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26332 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26335 rs6000_split_logical (rtx operands
[3],
26336 enum rtx_code code
,
26337 bool complement_final_p
,
26338 bool complement_op1_p
,
26339 bool complement_op2_p
)
26341 machine_mode mode
= GET_MODE (operands
[0]);
26342 machine_mode sub_mode
;
26344 int sub_size
, regno0
, regno1
, nregs
, i
;
26346 /* If this is DImode, use the specialized version that can run before
26347 register allocation. */
26348 if (mode
== DImode
&& !TARGET_POWERPC64
)
26350 rs6000_split_logical_di (operands
, code
, complement_final_p
,
26351 complement_op1_p
, complement_op2_p
);
26357 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
26358 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
26359 sub_size
= GET_MODE_SIZE (sub_mode
);
26360 regno0
= REGNO (op0
);
26361 regno1
= REGNO (op1
);
26363 gcc_assert (reload_completed
);
26364 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
26365 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
26367 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
26368 gcc_assert (nregs
> 1);
26370 if (op2
&& REG_P (op2
))
26371 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
26373 for (i
= 0; i
< nregs
; i
++)
26375 int offset
= i
* sub_size
;
26376 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
26377 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
26378 rtx sub_op2
= ((code
== NOT
)
26380 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
26382 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
26383 complement_final_p
, complement_op1_p
,
26391 /* Return true if the peephole2 can combine a load involving a combination of
26392 an addis instruction and a load with an offset that can be fused together on
26396 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
26397 rtx addis_value
, /* addis value. */
26398 rtx target
, /* target register that is loaded. */
26399 rtx mem
) /* bottom part of the memory addr. */
26404 /* Validate arguments. */
26405 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
26408 if (!base_reg_operand (target
, GET_MODE (target
)))
26411 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
26414 /* Allow sign/zero extension. */
26415 if (GET_CODE (mem
) == ZERO_EXTEND
26416 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
26417 mem
= XEXP (mem
, 0);
26422 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
26425 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
26426 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
26429 /* Validate that the register used to load the high value is either the
26430 register being loaded, or we can safely replace its use.
26432 This function is only called from the peephole2 pass and we assume that
26433 there are 2 instructions in the peephole (addis and load), so we want to
26434 check if the target register was not used in the memory address and the
26435 register to hold the addis result is dead after the peephole. */
26436 if (REGNO (addis_reg
) != REGNO (target
))
26438 if (reg_mentioned_p (target
, mem
))
26441 if (!peep2_reg_dead_p (2, addis_reg
))
26444 /* If the target register being loaded is the stack pointer, we must
26445 avoid loading any other value into it, even temporarily. */
26446 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
26450 base_reg
= XEXP (addr
, 0);
26451 return REGNO (addis_reg
) == REGNO (base_reg
);
26454 /* During the peephole2 pass, adjust and expand the insns for a load fusion
26455 sequence. We adjust the addis register to use the target register. If the
26456 load sign extends, we adjust the code to do the zero extending load, and an
26457 explicit sign extension later since the fusion only covers zero extending
26461 operands[0] register set with addis (to be replaced with target)
26462 operands[1] value set via addis
26463 operands[2] target register being loaded
26464 operands[3] D-form memory reference using operands[0]. */
26467 expand_fusion_gpr_load (rtx
*operands
)
26469 rtx addis_value
= operands
[1];
26470 rtx target
= operands
[2];
26471 rtx orig_mem
= operands
[3];
26472 rtx new_addr
, new_mem
, orig_addr
, offset
;
26473 enum rtx_code plus_or_lo_sum
;
26474 machine_mode target_mode
= GET_MODE (target
);
26475 machine_mode extend_mode
= target_mode
;
26476 machine_mode ptr_mode
= Pmode
;
26477 enum rtx_code extend
= UNKNOWN
;
26479 if (GET_CODE (orig_mem
) == ZERO_EXTEND
26480 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
26482 extend
= GET_CODE (orig_mem
);
26483 orig_mem
= XEXP (orig_mem
, 0);
26484 target_mode
= GET_MODE (orig_mem
);
26487 gcc_assert (MEM_P (orig_mem
));
26489 orig_addr
= XEXP (orig_mem
, 0);
26490 plus_or_lo_sum
= GET_CODE (orig_addr
);
26491 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
26493 offset
= XEXP (orig_addr
, 1);
26494 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
26495 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
26497 if (extend
!= UNKNOWN
)
26498 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
26500 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
26501 UNSPEC_FUSION_GPR
);
26502 emit_insn (gen_rtx_SET (target
, new_mem
));
26504 if (extend
== SIGN_EXTEND
)
26506 int sub_off
= ((BYTES_BIG_ENDIAN
)
26507 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
26510 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
26512 emit_insn (gen_rtx_SET (target
,
26513 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
26519 /* Emit the addis instruction that will be part of a fused instruction
26523 emit_fusion_addis (rtx target
, rtx addis_value
)
26526 const char *addis_str
= NULL
;
26528 /* Emit the addis instruction. */
26529 fuse_ops
[0] = target
;
26530 if (satisfies_constraint_L (addis_value
))
26532 fuse_ops
[1] = addis_value
;
26533 addis_str
= "lis %0,%v1";
26536 else if (GET_CODE (addis_value
) == PLUS
)
26538 rtx op0
= XEXP (addis_value
, 0);
26539 rtx op1
= XEXP (addis_value
, 1);
26541 if (REG_P (op0
) && CONST_INT_P (op1
)
26542 && satisfies_constraint_L (op1
))
26546 addis_str
= "addis %0,%1,%v2";
26550 else if (GET_CODE (addis_value
) == HIGH
)
26552 rtx value
= XEXP (addis_value
, 0);
26553 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
26555 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
26556 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
26558 addis_str
= "addis %0,%2,%1@toc@ha";
26560 else if (TARGET_XCOFF
)
26561 addis_str
= "addis %0,%1@u(%2)";
26564 gcc_unreachable ();
26567 else if (GET_CODE (value
) == PLUS
)
26569 rtx op0
= XEXP (value
, 0);
26570 rtx op1
= XEXP (value
, 1);
26572 if (GET_CODE (op0
) == UNSPEC
26573 && XINT (op0
, 1) == UNSPEC_TOCREL
26574 && CONST_INT_P (op1
))
26576 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
26577 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
26580 addis_str
= "addis %0,%2,%1+%3@toc@ha";
26582 else if (TARGET_XCOFF
)
26583 addis_str
= "addis %0,%1+%3@u(%2)";
26586 gcc_unreachable ();
26590 else if (satisfies_constraint_L (value
))
26592 fuse_ops
[1] = value
;
26593 addis_str
= "lis %0,%v1";
26596 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
26598 fuse_ops
[1] = value
;
26599 addis_str
= "lis %0,%1@ha";
26604 fatal_insn ("Could not generate addis value for fusion", addis_value
);
26606 output_asm_insn (addis_str
, fuse_ops
);
26609 /* Emit a D-form load or store instruction that is the second instruction
26610 of a fusion sequence. */
26613 emit_fusion_load (rtx load_reg
, rtx addis_reg
, rtx offset
, const char *insn_str
)
26616 char insn_template
[80];
26618 fuse_ops
[0] = load_reg
;
26619 fuse_ops
[1] = addis_reg
;
26621 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
26623 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
26624 fuse_ops
[2] = offset
;
26625 output_asm_insn (insn_template
, fuse_ops
);
26628 else if (GET_CODE (offset
) == UNSPEC
26629 && XINT (offset
, 1) == UNSPEC_TOCREL
)
26632 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
26634 else if (TARGET_XCOFF
)
26635 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
26638 gcc_unreachable ();
26640 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
26641 output_asm_insn (insn_template
, fuse_ops
);
26644 else if (GET_CODE (offset
) == PLUS
26645 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
26646 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
26647 && CONST_INT_P (XEXP (offset
, 1)))
26649 rtx tocrel_unspec
= XEXP (offset
, 0);
26651 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
26653 else if (TARGET_XCOFF
)
26654 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
26657 gcc_unreachable ();
26659 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
26660 fuse_ops
[3] = XEXP (offset
, 1);
26661 output_asm_insn (insn_template
, fuse_ops
);
26664 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
26666 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
26668 fuse_ops
[2] = offset
;
26669 output_asm_insn (insn_template
, fuse_ops
);
26673 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
26678 /* Given an address, convert it into the addis and load offset parts. Addresses
26679 created during the peephole2 process look like:
26680 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
26681 (unspec [(...)] UNSPEC_TOCREL)) */
26684 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
26688 if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
26690 hi
= XEXP (addr
, 0);
26691 lo
= XEXP (addr
, 1);
26694 gcc_unreachable ();
26700 /* Return a string to fuse an addis instruction with a gpr load to the same
26701 register that we loaded up the addis instruction. The address that is used
26702 is the logical address that was formed during peephole2:
26703 (lo_sum (high) (low-part))
26705 The code is complicated, so we call output_asm_insn directly, and just
26709 emit_fusion_gpr_load (rtx target
, rtx mem
)
26714 const char *load_str
= NULL
;
26717 if (GET_CODE (mem
) == ZERO_EXTEND
)
26718 mem
= XEXP (mem
, 0);
26720 gcc_assert (REG_P (target
) && MEM_P (mem
));
26722 addr
= XEXP (mem
, 0);
26723 fusion_split_address (addr
, &addis_value
, &load_offset
);
26725 /* Now emit the load instruction to the same register. */
26726 mode
= GET_MODE (mem
);
26744 gcc_assert (TARGET_POWERPC64
);
26749 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
26752 /* Emit the addis instruction. */
26753 emit_fusion_addis (target
, addis_value
);
26755 /* Emit the D-form load instruction. */
26756 emit_fusion_load (target
, target
, load_offset
, load_str
);
26762 #ifdef RS6000_GLIBC_ATOMIC_FENV
26763 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
26764 static tree atomic_hold_decl
, atomic_clear_decl
, atomic_update_decl
;
26767 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
26770 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
26772 if (!TARGET_HARD_FLOAT
)
26774 #ifdef RS6000_GLIBC_ATOMIC_FENV
26775 if (atomic_hold_decl
== NULL_TREE
)
26778 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
26779 get_identifier ("__atomic_feholdexcept"),
26780 build_function_type_list (void_type_node
,
26781 double_ptr_type_node
,
26783 TREE_PUBLIC (atomic_hold_decl
) = 1;
26784 DECL_EXTERNAL (atomic_hold_decl
) = 1;
26787 if (atomic_clear_decl
== NULL_TREE
)
26790 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
26791 get_identifier ("__atomic_feclearexcept"),
26792 build_function_type_list (void_type_node
,
26794 TREE_PUBLIC (atomic_clear_decl
) = 1;
26795 DECL_EXTERNAL (atomic_clear_decl
) = 1;
26798 tree const_double
= build_qualified_type (double_type_node
,
26800 tree const_double_ptr
= build_pointer_type (const_double
);
26801 if (atomic_update_decl
== NULL_TREE
)
26804 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
26805 get_identifier ("__atomic_feupdateenv"),
26806 build_function_type_list (void_type_node
,
26809 TREE_PUBLIC (atomic_update_decl
) = 1;
26810 DECL_EXTERNAL (atomic_update_decl
) = 1;
26813 tree fenv_var
= create_tmp_var_raw (double_type_node
);
26814 TREE_ADDRESSABLE (fenv_var
) = 1;
26815 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
,
26816 build4 (TARGET_EXPR
, double_type_node
, fenv_var
,
26817 void_node
, NULL_TREE
, NULL_TREE
));
26819 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
26820 *clear
= build_call_expr (atomic_clear_decl
, 0);
26821 *update
= build_call_expr (atomic_update_decl
, 1,
26822 fold_convert (const_double_ptr
, fenv_addr
));
26827 tree mffs
= rs6000_builtin_decls
[RS6000_BUILTIN_MFFS
];
26828 tree mtfsf
= rs6000_builtin_decls
[RS6000_BUILTIN_MTFSF
];
26829 tree call_mffs
= build_call_expr (mffs
, 0);
26831 /* Generates the equivalent of feholdexcept (&fenv_var)
26833 *fenv_var = __builtin_mffs ();
26835 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
26836 __builtin_mtfsf (0xff, fenv_hold); */
26838 /* Mask to clear everything except for the rounding modes and non-IEEE
26839 arithmetic flag. */
26840 const unsigned HOST_WIDE_INT hold_exception_mask
26841 = HOST_WIDE_INT_C (0xffffffff00000007);
26843 tree fenv_var
= create_tmp_var_raw (double_type_node
);
26845 tree hold_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_var
, call_mffs
,
26846 NULL_TREE
, NULL_TREE
);
26848 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
26849 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
26850 build_int_cst (uint64_type_node
,
26851 hold_exception_mask
));
26853 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
26856 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
26857 build_int_cst (unsigned_type_node
, 0xff),
26860 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
26862 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
26864 double fenv_clear = __builtin_mffs ();
26865 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
26866 __builtin_mtfsf (0xff, fenv_clear); */
26868 /* Mask to clear everything except for the rounding modes and non-IEEE
26869 arithmetic flag. */
26870 const unsigned HOST_WIDE_INT clear_exception_mask
26871 = HOST_WIDE_INT_C (0xffffffff00000000);
26873 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
26875 tree clear_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_clear
,
26876 call_mffs
, NULL_TREE
, NULL_TREE
);
26878 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
26879 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
26881 build_int_cst (uint64_type_node
,
26882 clear_exception_mask
));
26884 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
26885 fenv_clear_llu_and
);
26887 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
26888 build_int_cst (unsigned_type_node
, 0xff),
26891 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
26893 /* Generates the equivalent of feupdateenv (&fenv_var)
26895 double old_fenv = __builtin_mffs ();
26896 double fenv_update;
26897 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
26898 (*(uint64_t*)fenv_var 0x1ff80fff);
26899 __builtin_mtfsf (0xff, fenv_update); */
26901 const unsigned HOST_WIDE_INT update_exception_mask
26902 = HOST_WIDE_INT_C (0xffffffff1fffff00);
26903 const unsigned HOST_WIDE_INT new_exception_mask
26904 = HOST_WIDE_INT_C (0x1ff80fff);
26906 tree old_fenv
= create_tmp_var_raw (double_type_node
);
26907 tree update_mffs
= build4 (TARGET_EXPR
, double_type_node
, old_fenv
,
26908 call_mffs
, NULL_TREE
, NULL_TREE
);
26910 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
26911 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
26912 build_int_cst (uint64_type_node
,
26913 update_exception_mask
));
26915 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
26916 build_int_cst (uint64_type_node
,
26917 new_exception_mask
));
26919 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
26920 old_llu_and
, new_llu_and
);
26922 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
26925 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
26926 build_int_cst (unsigned_type_node
, 0xff),
26927 fenv_update_mtfsf
);
26929 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
26933 rs6000_generate_float2_double_code (rtx dst
, rtx src1
, rtx src2
)
26935 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
26937 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
26938 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
26940 /* The destination of the vmrgew instruction layout is:
26941 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26942 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26943 vmrgew instruction will be correct. */
26944 if (BYTES_BIG_ENDIAN
)
26946 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0
, src1
, src2
,
26948 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1
, src1
, src2
,
26953 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
26954 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
26957 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
26958 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
26960 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2
, rtx_tmp0
));
26961 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3
, rtx_tmp1
));
26963 if (BYTES_BIG_ENDIAN
)
26964 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
26966 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
26970 rs6000_generate_float2_code (bool signed_convert
, rtx dst
, rtx src1
, rtx src2
)
26972 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
26974 rtx_tmp0
= gen_reg_rtx (V2DImode
);
26975 rtx_tmp1
= gen_reg_rtx (V2DImode
);
26977 /* The destination of the vmrgew instruction layout is:
26978 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26979 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26980 vmrgew instruction will be correct. */
26981 if (BYTES_BIG_ENDIAN
)
26983 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
26984 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
26988 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
26989 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
26992 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
26993 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
26995 if (signed_convert
)
26997 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2
, rtx_tmp0
));
26998 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3
, rtx_tmp1
));
27002 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2
, rtx_tmp0
));
27003 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3
, rtx_tmp1
));
27006 if (BYTES_BIG_ENDIAN
)
27007 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
27009 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
27013 rs6000_generate_vsigned2_code (bool signed_convert
, rtx dst
, rtx src1
,
27016 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
27018 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
27019 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
27021 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
27022 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
27024 rtx_tmp2
= gen_reg_rtx (V4SImode
);
27025 rtx_tmp3
= gen_reg_rtx (V4SImode
);
27027 if (signed_convert
)
27029 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2
, rtx_tmp0
));
27030 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3
, rtx_tmp1
));
27034 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2
, rtx_tmp0
));
27035 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3
, rtx_tmp1
));
27038 emit_insn (gen_p8_vmrgew_v4si (dst
, rtx_tmp2
, rtx_tmp3
));
27041 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27044 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
27045 optimization_type opt_type
)
27050 return (opt_type
== OPTIMIZE_FOR_SPEED
27051 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
27058 /* Implement TARGET_CONSTANT_ALIGNMENT. */
27060 static HOST_WIDE_INT
27061 rs6000_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
27063 if (TREE_CODE (exp
) == STRING_CST
27064 && (STRICT_ALIGNMENT
|| !optimize_size
))
27065 return MAX (align
, BITS_PER_WORD
);
27069 /* Implement TARGET_STARTING_FRAME_OFFSET. */
27071 static HOST_WIDE_INT
27072 rs6000_starting_frame_offset (void)
27074 if (FRAME_GROWS_DOWNWARD
)
27076 return RS6000_STARTING_FRAME_OFFSET
;
27080 /* Create an alias for a mangled name where we have changed the mangling (in
27081 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
27082 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
27084 #if TARGET_ELF && RS6000_WEAK
27086 rs6000_globalize_decl_name (FILE * stream
, tree decl
)
27088 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
27090 targetm
.asm_out
.globalize_label (stream
, name
);
27092 if (rs6000_passes_ieee128
&& name
[0] == '_' && name
[1] == 'Z')
27094 tree save_asm_name
= DECL_ASSEMBLER_NAME (decl
);
27095 const char *old_name
;
27097 ieee128_mangling_gcc_8_1
= true;
27098 lang_hooks
.set_decl_assembler_name (decl
);
27099 old_name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
27100 SET_DECL_ASSEMBLER_NAME (decl
, save_asm_name
);
27101 ieee128_mangling_gcc_8_1
= false;
27103 if (strcmp (name
, old_name
) != 0)
27105 fprintf (stream
, "\t.weak %s\n", old_name
);
27106 fprintf (stream
, "\t.set %s,%s\n", old_name
, name
);
27113 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
27114 function names from <foo>l to <foo>f128 if the default long double type is
27115 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
27116 include file switches the names on systems that support long double as IEEE
27117 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
27118 In the future, glibc will export names like __ieee128_sinf128 and we can
27119 switch to using those instead of using sinf128, which pollutes the user's
27122 This will switch the names for Fortran math functions as well (which doesn't
27123 use math.h). However, Fortran needs other changes to the compiler and
27124 library before you can switch the real*16 type at compile time.
27126 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
27127 only do this if the default is that long double is IBM extended double, and
27128 the user asked for IEEE 128-bit. */
27131 rs6000_mangle_decl_assembler_name (tree decl
, tree id
)
27133 if (!TARGET_IEEEQUAD_DEFAULT
&& TARGET_IEEEQUAD
&& TARGET_LONG_DOUBLE_128
27134 && TREE_CODE (decl
) == FUNCTION_DECL
27135 && DECL_IS_UNDECLARED_BUILTIN (decl
))
27137 size_t len
= IDENTIFIER_LENGTH (id
);
27138 const char *name
= IDENTIFIER_POINTER (id
);
27140 if (name
[len
- 1] == 'l')
27142 bool uses_ieee128_p
= false;
27143 tree type
= TREE_TYPE (decl
);
27144 machine_mode ret_mode
= TYPE_MODE (type
);
27146 /* See if the function returns a IEEE 128-bit floating point type or
27148 if (ret_mode
== TFmode
|| ret_mode
== TCmode
)
27149 uses_ieee128_p
= true;
27152 function_args_iterator args_iter
;
27155 /* See if the function passes a IEEE 128-bit floating point type
27156 or complex type. */
27157 FOREACH_FUNCTION_ARGS (type
, arg
, args_iter
)
27159 machine_mode arg_mode
= TYPE_MODE (arg
);
27160 if (arg_mode
== TFmode
|| arg_mode
== TCmode
)
27162 uses_ieee128_p
= true;
27168 /* If we passed or returned an IEEE 128-bit floating point type,
27169 change the name. */
27170 if (uses_ieee128_p
)
27172 char *name2
= (char *) alloca (len
+ 4);
27173 memcpy (name2
, name
, len
- 1);
27174 strcpy (name2
+ len
- 1, "f128");
27175 id
= get_identifier (name2
);
27183 /* Predict whether the given loop in gimple will be transformed in the RTL
27184 doloop_optimize pass. */
27187 rs6000_predict_doloop_p (struct loop
*loop
)
27191 /* On rs6000, targetm.can_use_doloop_p is actually
27192 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
27193 if (loop
->inner
!= NULL
)
27195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
27196 fprintf (dump_file
, "Predict doloop failure due to"
27197 " loop nesting.\n");
27204 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
27207 rs6000_cannot_substitute_mem_equiv_p (rtx mem
)
27209 gcc_assert (MEM_P (mem
));
27211 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
27212 type addresses, so don't allow MEMs with those address types to be
27213 substituted as an equivalent expression. See PR93974 for details. */
27214 if (GET_CODE (XEXP (mem
, 0)) == AND
)
27220 /* Implement TARGET_INVALID_CONVERSION. */
27222 static const char *
27223 rs6000_invalid_conversion (const_tree fromtype
, const_tree totype
)
27225 /* Make sure we're working with the canonical types. */
27226 if (TYPE_CANONICAL (fromtype
) != NULL_TREE
)
27227 fromtype
= TYPE_CANONICAL (fromtype
);
27228 if (TYPE_CANONICAL (totype
) != NULL_TREE
)
27229 totype
= TYPE_CANONICAL (totype
);
27231 machine_mode frommode
= TYPE_MODE (fromtype
);
27232 machine_mode tomode
= TYPE_MODE (totype
);
27234 if (frommode
!= tomode
)
27236 /* Do not allow conversions to/from XOmode and OOmode types. */
27237 if (frommode
== XOmode
)
27238 return N_("invalid conversion from type %<__vector_quad%>");
27239 if (tomode
== XOmode
)
27240 return N_("invalid conversion to type %<__vector_quad%>");
27241 if (frommode
== OOmode
)
27242 return N_("invalid conversion from type %<__vector_pair%>");
27243 if (tomode
== OOmode
)
27244 return N_("invalid conversion to type %<__vector_pair%>");
27246 else if (POINTER_TYPE_P (fromtype
) && POINTER_TYPE_P (totype
))
27248 /* We really care about the modes of the base types. */
27249 frommode
= TYPE_MODE (TREE_TYPE (fromtype
));
27250 tomode
= TYPE_MODE (TREE_TYPE (totype
));
27252 /* Do not allow conversions to/from XOmode and OOmode pointer
27253 types, except to/from void pointers. */
27254 if (frommode
!= tomode
27255 && frommode
!= VOIDmode
27256 && tomode
!= VOIDmode
)
27258 if (frommode
== XOmode
)
27259 return N_("invalid conversion from type %<* __vector_quad%>");
27260 if (tomode
== XOmode
)
27261 return N_("invalid conversion to type %<* __vector_quad%>");
27262 if (frommode
== OOmode
)
27263 return N_("invalid conversion from type %<* __vector_pair%>");
27264 if (tomode
== OOmode
)
27265 return N_("invalid conversion to type %<* __vector_pair%>");
27269 /* Conversion allowed. */
27274 rs6000_const_f32_to_i32 (rtx operand
)
27277 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (operand
);
27279 gcc_assert (GET_MODE (operand
) == SFmode
);
27280 REAL_VALUE_TO_TARGET_SINGLE (*rv
, value
);
27285 rs6000_emit_xxspltidp_v2df (rtx dst
, long value
)
27287 if (((value
& 0x7F800000) == 0) && ((value
& 0x7FFFFF) != 0))
27288 inform (input_location
,
27289 "the result for the xxspltidp instruction "
27290 "is undefined for subnormal input values");
27291 emit_insn( gen_xxspltidp_v2df_inst (dst
, GEN_INT (value
)));
27294 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
27297 rs6000_gen_pic_addr_diff_vec (void)
27299 return rs6000_relative_jumptables
;
27303 rs6000_output_addr_vec_elt (FILE *file
, int value
)
27305 const char *directive
= TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t";
27308 fprintf (file
, "%s", directive
);
27309 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", value
);
27310 assemble_name (file
, buf
);
27311 fprintf (file
, "\n");
27314 struct gcc_target targetm
= TARGET_INITIALIZER
;
27316 #include "gt-rs6000.h"