From 3e624fa4b870f90c8f5c31ad533b3abc4a4bfa93 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 9 Nov 2023 12:54:23 +0100 Subject: [PATCH] x86: Cpu64 handling improvements First of all we want to also accumulate its reverse dependencies, such that we can use them in cpu_flags_match(). This is in particular in preparation of APX additions, such that e.g. BMI VEX-encoding templates can become combined VEX/EVEX ones. Once we have the reverse dependencies, we can further leverage them to omit explicit "&x64" from any insn templates dealing with 64-bit-mode- only ISA extensions. Besides helping readability for several insn templates we already have, this will also help with what is going to be added for APX (as all of the new templates would otherwise need to have "&x64"). Note that rather than leaving a meaningless CPU_64_FLAGS (which is unused anyway), its emitting is now also suppressed. --- gas/config/tc-i386.c | 18 ++++++++--- opcodes/i386-gen.c | 46 ++++++++++++++++++++++++--- opcodes/i386-init.h | 20 ++++++------ opcodes/i386-opc.tbl | 76 +++++++++++++++++++++++--------------------- 4 files changed, 103 insertions(+), 57 deletions(-) diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index c7b9a95e50b..c6a15216d3b 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -804,6 +804,9 @@ static char *cpu_sub_arch_name = NULL; /* CPU feature flags. */ i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS; +/* ISA extensions available in 64-bit mode only. */ +static const i386_cpu_flags cpu_64_flags = CPU_ANY_64_FLAGS; + /* If we have selected a cpu we are generating instructions for. */ static int cpu_arch_tune_set = 0; @@ -1874,7 +1877,12 @@ cpu_flags_match (const insn_template *t) else { /* This instruction is available only on some archs. */ - i386_cpu_flags cpu = cpu_arch_flags; + i386_cpu_flags active, cpu; + + if (flag_code != CODE_64BIT) + active = cpu_flags_and_not (cpu_arch_flags, cpu_64_flags); + else + active = cpu_arch_flags; /* Dual VEX/EVEX templates may need stripping of one of the flags. */ if (t->opcode_modifier.vex && t->opcode_modifier.evex) @@ -1895,14 +1903,14 @@ cpu_flags_match (const insn_template *t) { x.bitfield.cpuavx512f = 0; x.bitfield.cpuavx512vl = 0; - if (x.bitfield.cpufma && !cpu.bitfield.cpufma) + if (x.bitfield.cpufma && !active.bitfield.cpufma) x.bitfield.cpuavx = 0; } } } /* AVX512VL is no standalone feature - match it and then strip it. */ - if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl) + if (x.bitfield.cpuavx512vl && !active.bitfield.cpuavx512vl) return match; x.bitfield.cpuavx512vl = 0; @@ -1912,7 +1920,7 @@ cpu_flags_match (const insn_template *t) if (x.bitfield.cpuavx && x.bitfield.cpuavx2) x.bitfield.cpuavx2 = 0; - cpu = cpu_flags_and (x, cpu); + cpu = cpu_flags_and (x, active); if (!cpu_flags_all_zero (&cpu)) { if (t->cpu.bitfield.cpuavx && t->cpu.bitfield.cpuavx512f) @@ -1921,7 +1929,7 @@ cpu_flags_match (const insn_template *t) ? cpu.bitfield.cpuavx512f : cpu.bitfield.cpuavx) && (!x.bitfield.cpufma || cpu.bitfield.cpufma - || cpu_arch_flags.bitfield.cpuavx512f) + || active.bitfield.cpuavx512f) && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni) && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes) && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq)) diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c index b2ddda3755b..c118f01c492 100644 --- a/opcodes/i386-gen.c +++ b/opcodes/i386-gen.c @@ -166,6 +166,10 @@ static const dependency isa_dependencies[] = "AVX2" }, { "AVX_NE_CONVERT", "AVX2" }, + { "CX16", + "64" }, + { "LKGS", + "64" }, { "FRED", "LKGS" }, { "AVX512F", @@ -240,13 +244,13 @@ static const dependency isa_dependencies[] = { "SNP", "SEV_ES" }, { "RMPQUERY", - "SNP" }, + "SNP|64" }, { "TSX", "RTM|HLE" }, { "TSXLDTRK", "RTM" }, { "AMX_TILE", - "XSAVE" }, + "XSAVE|64" }, { "AMX_INT8", "AMX_TILE" }, { "AMX_BF16", @@ -259,6 +263,18 @@ static const dependency isa_dependencies[] = "SSE2" }, { "WIDEKL", "KL" }, + { "PBNDKB", + "64" }, + { "UINTR", + "64" }, + { "PREFETCHI", + "64" }, + { "CMPCCXADD", + "64" }, + { "MSRLIST", + "64" }, + { "USER_MSR", + "64" }, }; /* This array is populated as process_i386_initializers() walks cpu_flags[]. */ @@ -772,8 +788,10 @@ add_isa_dependencies (bitfield *flags, const char *f, int value, } free (deps); - /* ISA extensions with dependencies need CPU_ANY_*_FLAGS emitted. */ - if (reverse < ARRAY_SIZE (isa_reverse_deps[0])) + /* ISA extensions with dependencies need CPU_ANY_*_FLAGS emitted, + unless the sole dependency is the "64-bit mode only" one. */ + if (reverse < ARRAY_SIZE (isa_reverse_deps[0]) + && strcmp (isa_dependencies[i].deps, "64")) isa_reverse_deps[reverse][reverse] = 1; is_avx = orig_is_avx; @@ -919,6 +937,15 @@ process_i386_cpu_flag (FILE *table, char *flag, size_t len = strlen (name); char *upper = xmalloc (len + 1); + /* Cpu64 is special: It specifies a mode dependency, not an ISA one. Zap + the flag from ISA initializer macros (and from CPU_ANY_64_FLAGS + itself we only care about tracking its dependents. Also don't emit the + (otherwise all zero) CPU_64_FLAGS. */ + if (flag != NULL && reverse == Cpu64) + return; + if (is_isa || flag == NULL) + flags[Cpu64].value = 0; + for (i = 0; i < len; ++i) { /* Don't emit #define-s for auxiliary entries. */ @@ -931,6 +958,14 @@ process_i386_cpu_flag (FILE *table, char *flag, flag != NULL ? "": "ANY_", upper); free (upper); } + else + { + /* Synthesize "64-bit mode only" dependencies from the dependencies we + have accumulated. */ + for (i = 0; i < ARRAY_SIZE (isa_reverse_deps[0]); ++i) + if (flags[i].value && isa_reverse_deps[Cpu64][i]) + flags[Cpu64].value = 1; + } output_cpu_flags (table, flags, ARRAY_SIZE (flags), name != NULL, comma, indent, lineno); @@ -2142,6 +2177,8 @@ main (int argc, char **argv) qsort (operand_types, ARRAY_SIZE (operand_types), sizeof (operand_types [0]), compare); + process_i386_initializers (); + table = fopen ("i386-tbl.h", "w"); if (table == NULL) fail ("can't create i386-tbl.h, errno = %s\n", @@ -2151,7 +2188,6 @@ main (int argc, char **argv) process_i386_opcodes (table); process_i386_registers (table); - process_i386_initializers (); fclose (table); diff --git a/opcodes/i386-init.h b/opcodes/i386-init.h index 475db31e099..5aecf77f841 100644 --- a/opcodes/i386-init.h +++ b/opcodes/i386-init.h @@ -1398,16 +1398,6 @@ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, \ 0, 0, 0, 0, 0 } } -#define CPU_64_FLAGS \ - { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, \ - 0, 0, 0, 0, 0 } } - #define CPU_AVX_FLAGS \ { { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, \ 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, \ @@ -2518,6 +2508,16 @@ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, \ 0, 0, 0, 0, 0 } } +#define CPU_ANY_64_FLAGS \ + { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, \ + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0 } } + #define CPU_ANY_AVX_FLAGS \ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, \ diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index a3426298340..d9540e3c3ae 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -158,6 +158,8 @@ #define i287 287 #define i387 387 #define i687 687 +// Note: Don't add this one to any templates already specifying a 64-bit-mode- +// only ISA extension: i386-gen takes care of adding such dependencies. #define x64 64 ### MARKER ### @@ -1273,7 +1275,7 @@ fisttpll, 0xdd/1, FISTTP, Modrm|NoSuf|ATTSyntax, { Unspecified|BaseIndex } // CMPXCHG16B instruction. -cmpxchg16b, 0xfc7/1, CX16|x64, Modrm|NoSuf|Size64|LockPrefixOk, { Oword|Unspecified|BaseIndex } +cmpxchg16b, 0xfc7/1, CX16, Modrm|NoSuf|Size64|LockPrefixOk, { Oword|Unspecified|BaseIndex } // MONITOR instructions. @@ -3013,7 +3015,7 @@ pconfig, 0x0f01c5, PCONFIG, NoSuf, {} // PBNDKB instruction. -pbndkb, 0x0f01c7, PBNDKB|x64, NoSuf, {} +pbndkb, 0x0f01c7, PBNDKB, NoSuf, {} // PBNDKB instruction end. @@ -3100,8 +3102,8 @@ rmpadjust, 0xf30f01fe, SNP|x64, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword } // RMPQUERY instruction -rmpquery, 0xf30f01fd, RMPQUERY|x64, NoSuf, {} -rmpquery, 0xf30f01fd, RMPQUERY|x64, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword, RegC|Qword, RegD|Qword } +rmpquery, 0xf30f01fd, RMPQUERY, NoSuf, {} +rmpquery, 0xf30f01fd, RMPQUERY, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword, RegC|Qword, RegD|Qword } // RMPQUERY instruction end @@ -3126,26 +3128,26 @@ xresldtrk, 0xf20f01e9, TSXLDTRK, NoSuf, {} // AMX instructions. -ldtilecfg, 0x49/0, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex } -sttilecfg, 0x6649/0, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex } +ldtilecfg, 0x49/0, AMX_TILE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex } +sttilecfg, 0x6649/0, AMX_TILE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex } -tcmmimfp16ps, 0x666c, AMX_COMPLEX|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } -tcmmrlfp16ps, 0x6c, AMX_COMPLEX|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } +tcmmimfp16ps, 0x666c, AMX_COMPLEX, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } +tcmmrlfp16ps, 0x6c, AMX_COMPLEX, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } -tdpbf16ps, 0xf35c, AMX_BF16|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } -tdpfp16ps, 0xf25c, AMX_FP16|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } -tdpbssd, 0xf25e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } -tdpbuud, 0x5e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } -tdpbusd, 0x665e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } -tdpbsud, 0xf35e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } +tdpbf16ps, 0xf35c, AMX_BF16, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } +tdpfp16ps, 0xf25c, AMX_FP16, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } +tdpbssd, 0xf25e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } +tdpbuud, 0x5e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } +tdpbusd, 0x665e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } +tdpbsud, 0xf35e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM } -tileloadd, 0xf24b, AMX_TILE|x64, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM } -tileloaddt1, 0x664b, AMX_TILE|x64, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM } -tilestored, 0xf34b, AMX_TILE|x64, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { RegTMM, Unspecified|BaseIndex } +tileloadd, 0xf24b, AMX_TILE, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM } +tileloaddt1, 0x664b, AMX_TILE, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM } +tilestored, 0xf34b, AMX_TILE, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { RegTMM, Unspecified|BaseIndex } -tilerelease, 0x49c0, AMX_TILE|x64, Vex128|Space0F38|VexW0|NoSuf, {} +tilerelease, 0x49c0, AMX_TILE, Vex128|Space0F38|VexW0|NoSuf, {} -tilezero, 0xf249, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM } +tilezero, 0xf249, AMX_TILE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM } // AMX instructions end. @@ -3176,11 +3178,11 @@ seamcall, 0x660f01cf, TDX|x64, NoSuf, {} // UINTR instructions. -uiret, 0xf30f01ec, UINTR|x64, NoSuf, {} -clui, 0xf30f01ee, UINTR|x64, NoSuf, {} -stui, 0xf30f01ef, UINTR|x64, NoSuf, {} -testui, 0xf30f01ed, UINTR|x64, NoSuf, {} -senduipi, 0xf30fc7/6, UINTR|x64, Modrm|NoSuf|NoRex64, { Reg64 } +uiret, 0xf30f01ec, UINTR, NoSuf, {} +clui, 0xf30f01ee, UINTR, NoSuf, {} +stui, 0xf30f01ef, UINTR, NoSuf, {} +testui, 0xf30f01ed, UINTR, NoSuf, {} +senduipi, 0xf30fc7/6, UINTR, Modrm|NoSuf|NoRex64, { Reg64 } // UINTR instructions end. @@ -3302,14 +3304,14 @@ vrsqrtsh, 0x664f, AVX512_FP16, Modrm|EVexLIG|Masking|EVexMap6|VexVVVV|VexW0|Disp // PREFETCHI instructions. -prefetchit0, 0xf18/7, PREFETCHI|x64, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex } -prefetchit1, 0xf18/6, PREFETCHI|x64, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex } +prefetchit0, 0xf18/7, PREFETCHI, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex } +prefetchit1, 0xf18/6, PREFETCHI, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex } // PREFETCHI instructions end. // CMPCCXADD instructions. -cmpxadd, 0x66e, CMPCCXADD|x64, Modrm|Vex|Space0F38|VexVVVV|SwapSources|CheckOperandSize|NoSuf, { Reg32|Reg64, Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex } +cmpxadd, 0x66e, CMPCCXADD, Modrm|Vex|Space0F38|VexVVVV|SwapSources|CheckOperandSize|NoSuf, { Reg32|Reg64, Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex } // CMPCCXADD instructions end. @@ -3321,8 +3323,8 @@ wrmsrns, 0x0f01c6, WRMSRNS, NoSuf, {} // MSRLIST instructions. -rdmsrlist, 0xf20f01c6, MSRLIST|x64, NoSuf, {} -wrmsrlist, 0xf30f01c6, MSRLIST|x64, NoSuf, {} +rdmsrlist, 0xf20f01c6, MSRLIST, NoSuf, {} +wrmsrlist, 0xf30f01c6, MSRLIST, NoSuf, {} // MSRLIST instructions end. @@ -3337,23 +3339,23 @@ axor, 0xf30f38fc, RAO_INT, Modrm|IgnoreSize|CheckOperandSize|NoSuf, { Reg32|Reg6 // LKGS instruction. -lkgs, 0xf20f00/6, LKGS|x64, Modrm|IgnoreSize|No_bSuf|No_sSuf|NoRex64, { Reg16|Reg32|Reg64 } -lkgs, 0xf20f00/6, LKGS|x64, Modrm|IgnoreSize|No_bSuf|No_lSuf|No_sSuf|No_qSuf, { Word|Unspecified|BaseIndex } +lkgs, 0xf20f00/6, LKGS, Modrm|IgnoreSize|No_bSuf|No_sSuf|NoRex64, { Reg16|Reg32|Reg64 } +lkgs, 0xf20f00/6, LKGS, Modrm|IgnoreSize|No_bSuf|No_lSuf|No_sSuf|No_qSuf, { Word|Unspecified|BaseIndex } // LKGS instruction end. // FRED instructions. -erets, 0xf20f01ca, FRED|x64, NoSuf, {} -eretu, 0xf30f01ca, FRED|x64, NoSuf, {} +erets, 0xf20f01ca, FRED, NoSuf, {} +eretu, 0xf30f01ca, FRED, NoSuf, {} // FRED instructions end. // USER_MSR instructions. -urdmsr, 0xf20f38f8, USER_MSR|x64, RegMem|NoSuf|NoRex64, { Reg64, Reg64 } -urdmsr, 0xf2f8/0, USER_MSR|x64, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Imm32, Reg64 } -uwrmsr, 0xf30f38f8, USER_MSR|x64, Modrm|NoSuf|NoRex64, { Reg64, Reg64 } -uwrmsr, 0xf3f8/0, USER_MSR|x64, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Reg64, Imm32 } +urdmsr, 0xf20f38f8, USER_MSR, RegMem|NoSuf|NoRex64, { Reg64, Reg64 } +urdmsr, 0xf2f8/0, USER_MSR, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Imm32, Reg64 } +uwrmsr, 0xf30f38f8, USER_MSR, Modrm|NoSuf|NoRex64, { Reg64, Reg64 } +uwrmsr, 0xf3f8/0, USER_MSR, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Reg64, Imm32 } // USER_MSR instructions end. -- 2.30.2