From d4ce49b7ac077a9882d6a5e689e260300045ca88 Mon Sep 17 00:00:00 2001 From: Andrew Burgess Date: Tue, 21 Jun 2022 20:23:35 +0100 Subject: [PATCH] gdb: disassembler opcode display formatting This commit changes the format of 'disassemble /r' to match GNU objdump. Specifically, GDB will now display the instruction bytes in as 'objdump --wide --disassemble' does. Here is an example for RISC-V before this patch: (gdb) disassemble /r 0x0001018e,0x0001019e Dump of assembler code from 0x1018e to 0x1019e: 0x0001018e : 03 26 84 fe lw a2,-24(s0) 0x00010192 : 83 25 c4 fe lw a1,-20(s0) 0x00010196 : 61 65 lui a0,0x18 0x00010198 : 13 05 85 6a addi a0,a0,1704 0x0001019c : f1 22 jal 0x10368 End of assembler dump. And here's an example after this patch: (gdb) disassemble /r 0x0001018e,0x0001019e Dump of assembler code from 0x1018e to 0x1019e: 0x0001018e : fe842603 lw a2,-24(s0) 0x00010192 : fec42583 lw a1,-20(s0) 0x00010196 : 6561 lui a0,0x18 0x00010198 : 6a850513 addi a0,a0,1704 0x0001019c : 22f1 jal 0x10368 End of assembler dump. There are two differences here. First, the instruction bytes after the patch are grouped based on the size of the instruction, and are byte-swapped to little-endian order. Second, after the patch, GDB now uses the bytes-per-line hint from libopcodes to add whitespace padding after the opcode bytes, this means that in most cases the instructions are nicely aligned. It is still possible for a very long instruction to intrude into the disassembled text space. The next example is x86-64, before the patch: (gdb) disassemble /r main Dump of assembler code for function main: 0x0000000000401106 <+0>: 55 push %rbp 0x0000000000401107 <+1>: 48 89 e5 mov %rsp,%rbp 0x000000000040110a <+4>: c7 87 d8 00 00 00 01 00 00 00 movl $0x1,0xd8(%rdi) 0x0000000000401114 <+14>: b8 00 00 00 00 mov $0x0,%eax 0x0000000000401119 <+19>: 5d pop %rbp 0x000000000040111a <+20>: c3 ret End of assembler dump. And after the patch: (gdb) disassemble /r main Dump of assembler code for function main: 0x0000000000401106 <+0>: 55 push %rbp 0x0000000000401107 <+1>: 48 89 e5 mov %rsp,%rbp 0x000000000040110a <+4>: c7 87 d8 00 00 00 01 00 00 00 movl $0x1,0xd8(%rdi) 0x0000000000401114 <+14>: b8 00 00 00 00 mov $0x0,%eax 0x0000000000401119 <+19>: 5d pop %rbp 0x000000000040111a <+20>: c3 ret End of assembler dump. Most instructions are aligned, except for the very long instruction. Notice too that for x86-64 libopcodes doesn't request that GDB group the instruction bytes. This matches the behaviour of objdump. In case the user really wants the old behaviour, I have added a new modifier 'disassemble /b', this displays the instruction byte at a time. For x86-64, which never groups instruction bytes, /b and /r are equivalent, but for RISC-V, using /b gets the old layout back (except that the whitespace for alignment is still present). Consider our original RISC-V example, this time using /b: (gdb) disassemble /b 0x0001018e,0x0001019e Dump of assembler code from 0x1018e to 0x1019e: 0x0001018e : 03 26 84 fe lw a2,-24(s0) 0x00010192 : 83 25 c4 fe lw a1,-20(s0) 0x00010196 : 61 65 lui a0,0x18 0x00010198 : 13 05 85 6a addi a0,a0,1704 0x0001019c : f1 22 jal 0x10368 End of assembler dump. Obviously, this patch is a potentially significant change to the behaviour or /r. I could have added /b with the new behaviour and left /r alone. However, personally, I feel the new behaviour is significantly better than the old, hence, I made /r be what I consider the "better" behaviour. The reason I prefer the new behaviour is that, when I use /r, I almost always want to manually decode the instruction for some reason, and having the bytes displayed in "instruction order" rather than memory order, just makes this easier. The 'record instruction-history' command also takes a /r modifier, and has been modified in the same way as disassemble; /r gets the new behaviour, and /b has been added to retain the old behaviour. Finally, the MI command -data-disassemble, is unchanged in behaviour, this command now requests the raw bytes of the instruction, which is equivalent to the /b modifier. This means that the MI output will remain backward compatible. --- gdb/NEWS | 12 +++++++ gdb/cli/cli-cmds.c | 6 ++++ gdb/disasm-flags.h | 1 + gdb/disasm.c | 43 ++++++++++++++++++++-- gdb/doc/gdb.texinfo | 48 +++++++++++++++++++++---- gdb/mi/mi-cmd-disas.c | 6 ++-- gdb/record.c | 3 ++ gdb/testsuite/gdb.mi/mi-disassemble.exp | 6 ++-- 8 files changed, 109 insertions(+), 16 deletions(-) diff --git a/gdb/NEWS b/gdb/NEWS index 1457c99ff04..796a4ef8072 100644 --- a/gdb/NEWS +++ b/gdb/NEWS @@ -59,6 +59,18 @@ * gdb now supports zstd compressed debug sections (ELFCOMPRESS_ZSTD) for ELF. +* The format of 'disassemble /r' and 'record instruction-history /r' + has changed. The instruction bytes could now be grouped together, + and displayed in the endianness of the instruction. This is the + same layout as used by GNU objdump when disassembling. + + There is now 'disassemble /b' and 'record instruction-history /b' + which will always display the instructions bytes one at a time in + memory order, that is, the byte at the lowest address first. + + For both /r and /b GDB is now better at using whitespace in order to + align the disassembled instruction text. + * New commands maintenance set ignore-prologue-end-flag on|off diff --git a/gdb/cli/cli-cmds.c b/gdb/cli/cli-cmds.c index d5707192be0..c78b93f57b5 100644 --- a/gdb/cli/cli-cmds.c +++ b/gdb/cli/cli-cmds.c @@ -1508,6 +1508,9 @@ disassemble_current_function (gdb_disassembly_flags flags) A /r modifier will include raw instructions in hex with the assembly. + A /b modifier is similar to /r except the instruction bytes are printed + as separate bytes with no grouping, or endian switching. + A /s modifier will include source code with the assembly, like /m, with two important differences: 1) The output is still in pc address order. @@ -1546,6 +1549,9 @@ disassemble_command (const char *arg, int from_tty) case 'r': flags |= DISASSEMBLY_RAW_INSN; break; + case 'b': + flags |= DISASSEMBLY_RAW_BYTES; + break; case 's': flags |= DISASSEMBLY_SOURCE; break; diff --git a/gdb/disasm-flags.h b/gdb/disasm-flags.h index 025b6893941..5a7371b0a39 100644 --- a/gdb/disasm-flags.h +++ b/gdb/disasm-flags.h @@ -33,6 +33,7 @@ enum gdb_disassembly_flag DISASSEMBLY_OMIT_PC = (0x1 << 4), DISASSEMBLY_SOURCE = (0x1 << 5), DISASSEMBLY_SPECULATIVE = (0x1 << 6), + DISASSEMBLY_RAW_BYTES = (0x1 << 7), }; DEF_ENUM_FLAGS_TYPE (enum gdb_disassembly_flag, gdb_disassembly_flags); diff --git a/gdb/disasm.c b/gdb/disasm.c index ba6ac2d4827..b5e503fd71d 100644 --- a/gdb/disasm.c +++ b/gdb/disasm.c @@ -457,7 +457,7 @@ gdb_pretty_print_disassembler::pretty_print_insn (const struct disasm_insn *insn throw ex; } - if (flags & DISASSEMBLY_RAW_INSN) + if ((flags & (DISASSEMBLY_RAW_INSN | DISASSEMBLY_RAW_BYTES)) != 0) { /* Build the opcodes using a temporary stream so we can write them out in a single go for the MI. */ @@ -467,14 +467,51 @@ gdb_pretty_print_disassembler::pretty_print_insn (const struct disasm_insn *insn m_opcode_data.resize (size); read_code (pc, m_opcode_data.data (), size); - for (int i = 0; i < size; ++i) + /* The disassembler provides information about the best way to + display the instruction bytes to the user. We provide some sane + defaults in case the disassembler gets it wrong. */ + const struct disassemble_info *di = m_di.disasm_info (); + int bytes_per_line = std::max (di->bytes_per_line, size); + int bytes_per_chunk = std::max (di->bytes_per_chunk, 1); + + /* If the user has requested the instruction bytes be displayed + byte at a time, then handle that here. Also, if the instruction + is not a multiple of the chunk size (which probably indicates a + disassembler problem) then avoid that causing display problems + by switching to byte at a time mode. */ + if ((flags & DISASSEMBLY_RAW_BYTES) != 0 + || (size % bytes_per_chunk) != 0) + bytes_per_chunk = 1; + + /* Print the instruction opcodes bytes, grouped into chunks. */ + for (int i = 0; i < size; i += bytes_per_chunk) { if (i > 0) m_opcode_stb.puts (" "); - m_opcode_stb.printf ("%02x", (unsigned) m_opcode_data[i]); + + if (di->display_endian == BFD_ENDIAN_LITTLE) + { + for (int k = bytes_per_chunk; k-- != 0; ) + m_opcode_stb.printf ("%02x", (unsigned) m_opcode_data[i + k]); + } + else + { + for (int k = 0; k < bytes_per_chunk; k++) + m_opcode_stb.printf ("%02x", (unsigned) m_opcode_data[i + k]); + } + } + + /* Calculate required padding. */ + int nspaces = 0; + for (int i = size; i < bytes_per_line; i += bytes_per_chunk) + { + if (i > size) + nspaces++; + nspaces += bytes_per_chunk * 2; } m_uiout->field_stream ("opcodes", m_opcode_stb); + m_uiout->spaces (nspaces); m_uiout->text ("\t"); } diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo index 238a49b027d..596e5873558 100644 --- a/gdb/doc/gdb.texinfo +++ b/gdb/doc/gdb.texinfo @@ -7945,7 +7945,10 @@ are printed in execution order. It can also print mixed source+disassembly if you specify the the @code{/m} or @code{/s} modifier, and print the raw instructions in hex -as well as in symbolic form by specifying the @code{/r} modifier. +as well as in symbolic form by specifying the @code{/r} or @code{/b} +modifier. The behaviour of the @code{/m}, @code{/s}, @code{/r}, and +@code{/b} modifiers are the same as for the @kbd{disassemble} command +(@pxref{disassemble,,@kbd{disassemble}}). The current position marker is printed for the instruction at the current program counter value. This instruction can appear multiple @@ -9859,6 +9862,7 @@ After @code{info line}, using @code{info line} again without specifying a location will display information about the next source line. +@anchor{disassemble} @table @code @kindex disassemble @cindex assembly instructions @@ -9869,16 +9873,17 @@ line. @itemx disassemble /m @itemx disassemble /s @itemx disassemble /r +@itemx disassemble /b This specialized command dumps a range of memory as machine instructions. It can also print mixed source+disassembly by specifying -the @code{/m} or @code{/s} modifier and print the raw instructions in hex -as well as in symbolic form by specifying the @code{/r} modifier. -The default memory range is the function surrounding the +the @code{/m} or @code{/s} modifier and print the raw instructions in +hex as well as in symbolic form by specifying the @code{/r} or @code{/b} +modifier. The default memory range is the function surrounding the program counter of the selected frame. A single argument to this command is a program counter value; @value{GDBN} dumps the function -surrounding this value. When two arguments are given, they should -be separated by a comma, possibly surrounded by whitespace. The -arguments specify a range of addresses to dump, in one of two forms: +surrounding this value. When two arguments are given, they should be +separated by a comma, possibly surrounded by whitespace. The arguments +specify a range of addresses to dump, in one of two forms: @table @code @item @var{start},@var{end} @@ -9916,6 +9921,35 @@ Dump of assembler code from 0x32c4 to 0x32e4: End of assembler dump. @end smallexample +The following two examples are for RISC-V, and demonstrates the +difference between the @code{/r} and @code{/b} modifiers. First with +@code{/b}, the bytes of the instruction are printed, in hex, in memory +order: + +@smallexample +(@value{GDBP}) disassemble /b 0x00010150,0x0001015c +Dump of assembler code from 0x10150 to 0x1015c: + 0x00010150 : 22 dc sw s0,56(sp) + 0x00010152 : 80 00 addi s0,sp,64 + 0x00010154 : 23 26 a4 fe sw a0,-20(s0) + 0x00010158 : 23 24 b4 fe sw a1,-24(s0) +End of assembler dump. +@end smallexample + +In contrast, with @code{/r} the bytes of the instruction are displayed +in the instruction order, for RISC-V this means that the bytes have been +swapped to little-endian order: + +@smallexample +(@value{GDBP}) disassemble /r 0x00010150,0x0001015c +Dump of assembler code from 0x10150 to 0x1015c: + 0x00010150 : dc22 sw s0,56(sp) + 0x00010152 : 0080 addi s0,sp,64 + 0x00010154 : fea42623 sw a0,-20(s0) + 0x00010158 : feb42423 sw a1,-24(s0) +End of assembler dump. +@end smallexample + Here is an example showing mixed source+assembly for Intel x86 with @code{/m} or @code{/s}, when the program is stopped just after function prologue in a non-optimized function with no inline code. diff --git a/gdb/mi/mi-cmd-disas.c b/gdb/mi/mi-cmd-disas.c index 387c4900150..c8e06cd940a 100644 --- a/gdb/mi/mi-cmd-disas.c +++ b/gdb/mi/mi-cmd-disas.c @@ -165,16 +165,16 @@ mi_cmd_disassemble (const char *command, char **argv, int argc) disasm_flags |= DISASSEMBLY_SOURCE_DEPRECATED; break; case 2: - disasm_flags |= DISASSEMBLY_RAW_INSN; + disasm_flags |= DISASSEMBLY_RAW_BYTES; break; case 3: - disasm_flags |= DISASSEMBLY_SOURCE_DEPRECATED | DISASSEMBLY_RAW_INSN; + disasm_flags |= DISASSEMBLY_SOURCE_DEPRECATED | DISASSEMBLY_RAW_BYTES; break; case 4: disasm_flags |= DISASSEMBLY_SOURCE; break; case 5: - disasm_flags |= DISASSEMBLY_SOURCE | DISASSEMBLY_RAW_INSN; + disasm_flags |= DISASSEMBLY_SOURCE | DISASSEMBLY_RAW_BYTES; break; default: gdb_assert_not_reached ("bad disassembly mode"); diff --git a/gdb/record.c b/gdb/record.c index 17a5df262bd..2390a58f9c0 100644 --- a/gdb/record.c +++ b/gdb/record.c @@ -494,6 +494,9 @@ get_insn_history_modifiers (const char **arg) case 'r': modifiers |= DISASSEMBLY_RAW_INSN; break; + case 'b': + modifiers |= DISASSEMBLY_RAW_BYTES; + break; case 'f': modifiers |= DISASSEMBLY_OMIT_FNAME; break; diff --git a/gdb/testsuite/gdb.mi/mi-disassemble.exp b/gdb/testsuite/gdb.mi/mi-disassemble.exp index b7c52472c84..ef3337d4062 100644 --- a/gdb/testsuite/gdb.mi/mi-disassemble.exp +++ b/gdb/testsuite/gdb.mi/mi-disassemble.exp @@ -245,12 +245,12 @@ proc test_disassembly_opcode_format {} { # then disassemble using the MI command. set longest_insn_bytes "" set longest_insn_addr "" - gdb_test_multiple "disassemble /r main" "" { - -re "^disassemble /r main\r\n" { + gdb_test_multiple "disassemble /b main" "" { + -re "^disassemble /b main\r\n" { exp_continue } - -re "^&\"disassemble /r main.n\"\r\n" { + -re "^&\"disassemble /b main.n\"\r\n" { exp_continue } -- 2.30.2