Update readelf to v2.37, adapt readelf.py output and tests (#387)
authorMarco Bonelli <marco@mebeim.net>
Fri, 10 Dec 2021 14:36:18 +0000 (15:36 +0100)
committerGitHub <noreply@github.com>
Fri, 10 Dec 2021 14:36:18 +0000 (06:36 -0800)
Changes to conform the output of readelf.py to binutils readelf v2.37:

- Use singular "entry" when needed instead of "entries".

- Output the last entry for the .debug_line output table when
  DW_LNE_end_sequence is encountered, as DWARF standard dictates. Looks
  looks like this was a readelf bug which was fixed in commit
  ba8826a82a29a19b78c18ce4f44fe313de279af7 of the GNU binutils-gdb repo.

- Add additional "Stmt" field in the .debug_line output table, and
  ignore the new "View" field. The "Stmt" field has been implemented in
  readelf.py. The "View" field is not something that the DWARF standard
  defines, it's an internal register added to the line number
  information state machine by binutils to perform assembler checks (see
  commit ba8826a82a29a19b78c18ce4f44fe313de279af7 of GNU binutils-gdb
  repo for more info, in particular gas/doc/as.texinfo). "View" is
  unimplemented in pyelftools for now and a special case has been added
  in the readelf test suite to ignore it.

- Add support for printing section names when dumping .symtab entries of
  st_type STT_SECTION as readelf v2.37 does (see commit
  23356397449a8aa65afead0a895a20be53b3c6b0 of GNU binutils-gdb repo).

- Add suport for recognizing SOs specifically tagged as PIE (DT_FLAGS_1
  dynamic tag with DF_1_PIE set). In such case, describe the file as
  "Position-Independent Executable file" instead of "Shared object
  file", as readelf v2.37 does.

- Add leading "0x" for version section addresses when dumping version
   information (-V) as readelf does.

- Ignore "D (mbind)" in section headers flags legend (pyelftools does
  not output this flag).

Special cases ADDED for run_readelf_tests.py:

- Ignore "View" column for --debug-dump=decodedline in readelf's output.
- Ignore ellipsis ("[...]") for long names/symbols/paths in readelf's
  output.

Special cases REMOVED for run_readelf_tests.py:

- Detection of additional '@' after symbol names (flag_after_symtable)
  seems to no longer be needed as all tests pass whitout this exception.
- Special case for DW_AT_apple_xxx seems to no longer be needed, readelf
  now recognizes those.
- Special case for PT_GNU_PROPERTY no longer needed, readelf now
  recognizes it.

Other changes:

- Add missing import in elftools/dwarf/lineprogram.py.

References:

- GNU binutils-gdb repo: https://sourceware.org/git/?p=binutils-gdb.git

elftools/dwarf/lineprogram.py
elftools/elf/descriptions.py
scripts/readelf.py
test/external_tools/readelf
test/run_readelf_tests.py

index ce69d68590ee3b537fa8b2fc0f40f15486b4f7f3..dbde7bafdba7f5ebf2b6df532b87a63cc901f859 100644 (file)
@@ -10,7 +10,7 @@ import os
 import copy
 from collections import namedtuple
 
-from ..common.utils import struct_parse
+from ..common.utils import struct_parse, dwarf_assert
 from .constants import *
 
 
@@ -178,6 +178,7 @@ class LineProgram(object):
 
                 if ex_opcode == DW_LNE_end_sequence:
                     state.end_sequence = True
+                    state.is_stmt = 0
                     add_entry_new_state(ex_opcode, [], is_extended=True)
                     # reset state
                     state = LineState(self.header['default_is_stmt'])
index 27e23bed143146a3100ae7471d544f2f4f3f4cf4..d15cbe1e982e1ba19400f4f253cc7bd29a01bd0e 100644 (file)
@@ -35,7 +35,13 @@ def describe_ei_osabi(x):
     return _DESCR_EI_OSABI.get(x, _unknown)
 
 
-def describe_e_type(x):
+def describe_e_type(x, elffile=None):
+    if elffile is not None and x == 'ET_DYN':
+        # Detect whether this is a normal SO or a PIE executable
+        dynamic = elffile.get_section_by_name('.dynamic')
+        for t in dynamic.iter_tags('DT_FLAGS_1'):
+            if t.entry.d_val & ENUM_DT_FLAGS_1['DF_1_PIE']:
+                return 'DYN (Position-Independent Executable file)'
     return _DESCR_E_TYPE.get(x, _unknown)
 
 
index 80d565050fbf463031dfc56563275e688d2c75fa..2ddd02ba26871f4a0e2341aa6be07f01160b5aa8 100755 (executable)
@@ -108,7 +108,7 @@ class ReadElf(object):
         self._emitline('  ABI Version:                       %d' %
                 e_ident['EI_ABIVERSION'])
         self._emitline('  Type:                              %s' %
-                describe_e_type(header['e_type']))
+                describe_e_type(header['e_type'], self.elffile))
         self._emitline('  Machine:                           %s' %
                 describe_e_machine(header['e_machine']))
         self._emitline('  Version:                           %s' %
@@ -230,7 +230,7 @@ class ReadElf(object):
         elfheader = self.elffile.header
         if show_heading:
             self._emitline('Elf file type is %s' %
-                describe_e_type(elfheader['e_type']))
+                describe_e_type(elfheader['e_type'], self.elffile))
             self._emitline('Entry point is %s' %
                 self._format_hex(elfheader['e_entry']))
             # readelf weirness - why isn't e_phoff printed as hex? (for section
@@ -391,8 +391,10 @@ class ReadElf(object):
                     section.name))
                 continue
 
-            self._emitline("\nSymbol table '%s' contains %s entries:" % (
-                section.name, section.num_symbols()))
+            self._emitline("\nSymbol table '%s' contains %d %s:" % (
+                section.name,
+                section.num_symbols(),
+                'entry' if section.num_symbols() == 1 else 'entries'))
 
             if self.elffile.elfclass == 32:
                 self._emitline('   Num:    Value  Size Type    Bind   Vis      Ndx Name')
@@ -418,6 +420,13 @@ class ReadElf(object):
                             else:
                                 version_info = '@@%(name)s' % version
 
+                symbol_name = symbol.name
+                # Print section names for STT_SECTION symbols as readelf does
+                if (symbol['st_info']['type'] == 'STT_SECTION'
+                    and symbol['st_shndx'] < self.elffile.num_sections()
+                    and symbol['st_name'] == 0):
+                    symbol_name = self.elffile.get_section(symbol['st_shndx']).name
+
                 # symbol names are truncated to 25 chars, similarly to readelf
                 self._emitline('%6d: %s %s %-7s %-6s %-7s %4s %.25s%s' % (
                     nsym,
@@ -430,7 +439,7 @@ class ReadElf(object):
                     describe_symbol_shndx(self._get_symbol_shndx(symbol,
                                                                  nsym,
                                                                  section_index)),
-                    symbol.name,
+                    symbol_name,
                     version_info))
 
     def display_dynamic_tags(self):
@@ -442,9 +451,10 @@ class ReadElf(object):
                 continue
 
             has_dynamic_sections = True
-            self._emitline("\nDynamic section at offset %s contains %s entries:" % (
+            self._emitline("\nDynamic section at offset %s contains %d %s:" % (
                 self._format_hex(section['sh_offset']),
-                section.num_tags()))
+                section.num_tags(),
+                'entry' if section.num_tags() == 1 else 'entries'))
             self._emitline("  Tag        Type                         Name/Value")
 
             padding = 20 + (8 if self.elffile.elfclass == 32 else 0)
@@ -510,10 +520,11 @@ class ReadElf(object):
                 continue
 
             has_relocation_sections = True
-            self._emitline("\nRelocation section '%.128s' at offset %s contains %s entries:" % (
+            self._emitline("\nRelocation section '%.128s' at offset %s contains %d %s:" % (
                 section.name,
                 self._format_hex(section['sh_offset']),
-                section.num_relocations()))
+                section.num_relocations(),
+                'entry' if section.num_relocations() == 1 else 'entries'))
             if section.is_RELA():
                 self._emitline("  Offset          Info           Type           Sym. Value    Sym. Name + Addend")
             else:
@@ -592,11 +603,11 @@ class ReadElf(object):
             return
         for ehabi_info in self.elffile.get_ehabi_infos():
             # Unwind section '.ARM.exidx' at offset 0x203e8 contains 1009 entries:
-            self._emitline("\nUnwind section '%s' at offset 0x%x contains %d entries" % (
+            self._emitline("\nUnwind section '%s' at offset 0x%x contains %d %s" % (
                 ehabi_info.section_name(),
                 ehabi_info.section_offset(),
-                ehabi_info.num_entry()
-            ))
+                ehabi_info.num_entry(),
+                'entry' if ehabi_info.num_entry() == 1 else 'entries'))
 
             for i in range(ehabi_info.num_entry()):
                 entry = ehabi_info.get_entry(i)
@@ -632,9 +643,7 @@ class ReadElf(object):
 
         for section in self.elffile.iter_sections():
             if isinstance(section, GNUVerSymSection):
-                self._print_version_section_header(
-                    section, 'Version symbols', lead0x=False)
-
+                self._print_version_section_header(section, 'Version symbols')
                 num_symbols = section.num_symbols()
 
                 # Symbol version info are printed four by four entries
@@ -903,8 +912,9 @@ class ReadElf(object):
         else:
             num_entries = version_section.num_symbols()
 
-        self._emitline("\n%s section '%s' contains %s entries:" %
-            (name, version_section.name, num_entries))
+        self._emitline("\n%s section '%s' contains %d %s:" % (
+            name, version_section.name, num_entries,
+            'entry' if num_entries == 1 else 'entries'))
         self._emitline('%sAddr: %s  Offset: %s  Link: %i (%s)' % (
             ' ' * indent,
             self._format_hex(
@@ -1106,7 +1116,8 @@ class ReadElf(object):
         """
         if not self._dwarfinfo.has_debug_info:
             return
-        self._emitline('Decoded dump of debug contents of section %s:\n' % self._dwarfinfo.debug_line_sec.name)
+        self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_line_sec.name)
+        self._emitline()
 
         for cu in self._dwarfinfo.iter_CUs():
             lineprogram = self._dwarfinfo.line_program_for_CU(cu)
@@ -1121,7 +1132,7 @@ class ReadElf(object):
                 cu_filename = '%s/%s' % (bytes2str(dir), cu_filename)
 
             self._emitline('CU: %s:' % cu_filename)
-            self._emitline('File name                            Line number    Starting address')
+            self._emitline('File name                            Line number    Starting address    Stmt')
 
             # Print each state's file, line and address information. For some
             # instructions other output is needed to be compatible with
@@ -1143,23 +1154,19 @@ class ReadElf(object):
                     elif entry.command == DW_LNE_define_file:
                         self._emitline('%s:' % (
                             bytes2str(lineprogram['include_directory'][entry.args[0].dir_index])))
-                elif not state.end_sequence:
-                    # readelf doesn't print the state after end_sequence
-                    # instructions. I think it's a bug but to be compatible
-                    # I don't print them too.
-                    if lineprogram['version'] < 4 or self.elffile['e_machine'] == 'EM_PPC64':
-                        self._emitline('%-35s  %11d  %18s' % (
-                            bytes2str(lineprogram['file_entry'][state.file - 1].name),
-                            state.line,
-                            '0' if state.address == 0 else
-                                self._format_hex(state.address)))
-                    else:
-                        self._emitline('%-35s  %11d  %18s[%d]' % (
-                            bytes2str(lineprogram['file_entry'][state.file - 1].name),
-                            state.line,
-                            '0' if state.address == 0 else
-                                self._format_hex(state.address),
-                            state.op_index))
+                elif lineprogram['version'] < 4 or self.elffile['e_machine'] == 'EM_PPC64':
+                    self._emitline('%-35s  %11s  %18s    %s' % (
+                        bytes2str(lineprogram['file_entry'][state.file - 1].name),
+                        state.line if not state.end_sequence else '-',
+                        '0' if state.address == 0 else self._format_hex(state.address),
+                        'x' if state.is_stmt and not state.end_sequence else ''))
+                else:
+                    self._emitline('%-35s  %11d  %18s[%d] %s' % (
+                        bytes2str(lineprogram['file_entry'][state.file - 1].name),
+                        state.line if not state.end_sequence else '-',
+                        '0' if state.address == 0 else self._format_hex(state.address),
+                        state.op_index,
+                        'x' if state.is_stmt and not state.end_sequence else ''))
                 if entry.command == DW_LNS_copy:
                     # Another readelf oddity...
                     self._emitline()
index c467972602986c21cb419b0984b363dd5650367c..96f85d89c8d099da6a2139be28a922e7040fca0f 100755 (executable)
Binary files a/test/external_tools/readelf and b/test/external_tools/readelf differ
index 96447b4ab29d1e42ffc8b792d458da1e312c8175..59a039c5e214d25fb5b98cd5e1819b26d4cfd426 100755 (executable)
@@ -130,15 +130,16 @@ def compare_output(s1, s2):
     lines1 = prepare_lines(s1)
     lines2 = prepare_lines(s2)
 
-    flag_after_symtable = False
+    flag_in_debug_line_section = False
 
     if len(lines1) != len(lines2):
         return False, 'Number of lines different: %s vs %s' % (
                 len(lines1), len(lines2))
 
     for i in range(len(lines1)):
-        if 'symbol table' in lines1[i]:
-            flag_after_symtable = True
+        if lines1[i].endswith('debug_line section:'):
+            # .debug_line or .zdebug_line
+            flag_in_debug_line_section = True
 
         # readelf spelling error for GNU property notes
         lines1[i] = lines1[i].replace('procesor-specific type', 'processor-specific type')
@@ -162,12 +163,32 @@ def compare_output(s1, s2):
             sm = SequenceMatcher()
             sm.set_seqs(lines1[i], lines2[i])
             changes = sm.get_opcodes()
-            if flag_after_symtable:
-                # Detect readelf's adding @ with lib and version after
-                # symbol name.
-                if (    len(changes) == 2 and changes[1][0] == 'delete' and
-                        lines1[i][changes[1][1]] == '@'):
+            if flag_in_debug_line_section:
+                # readelf outputs an additional "View" column: ignore it
+                if len(lines1_parts) >= 2 and lines1_parts[-2] == 'view':
                     ok = True
+                else:
+                    # Fast check special-cased for the only ELF we have which
+                    # has this information (dwarf_gnuops4.so.elf)
+                    ok = (    lines1_parts[-2:] == ['1', 'x']
+                          and lines2_parts[-1] == 'x')
+            elif '[...]' in lines1[i]:
+                # Special case truncations with ellipsis like these:
+                #     .note.gnu.bu[...]        redelf
+                #     .note.gnu.build-i        pyelftools
+                # Or more complex for symbols with versions, like these:
+                #     _unw[...]@gcc_3.0        readelf
+                #     _unwind_resume@gcc_3.0   pyelftools
+                for p1, p2 in zip(lines1_parts, lines2_parts):
+                    dots_start = p1.find('[...]')
+                    if dots_start != -1:
+                        break
+                ok = p1.endswith('[...]') and p1[:dots_start] == p2[:dots_start]
+                if not ok:
+                    dots_end = dots_start + 5
+                    if len(p1) > dots_end and p1[dots_end] == '@':
+                        ok = (    p1[:dots_start] == p2[:dots_start]
+                              and p1[p1.rfind('@'):] == p2[p2.rfind('@'):])
             elif 'at_const_value' in lines1[i]:
                 # On 32-bit machines, readelf doesn't correctly represent
                 # some boundary LEB128 numbers
@@ -178,17 +199,11 @@ def compare_output(s1, s2):
             elif 'os/abi' in lines1[i]:
                 if 'unix - gnu' in lines1[i] and 'unix - linux' in lines2[i]:
                     ok = True
-            elif (  'unknown at value' in lines1[i] and
-                    'dw_at_apple' in lines2[i]):
-                ok = True
-            elif 'loos+0x474e553' in lines1[i]:
-                # readelf v2.29 does not know about PT_GNU_PROPERTY apparently
-                ok = lines2_parts[0] == 'gnu_property'
             elif len(lines1_parts) == 3 and lines1_parts[2] == 'nt_gnu_property_type_0':
                 # readelf does not seem to print a readable description for this
                 ok = lines1_parts == lines2_parts[:3]
             else:
-                for s in ('t (tls)', 'l (large)'):
+                for s in ('t (tls)', 'l (large)', 'd (mbind)'):
                     if s in lines1[i] or s in lines2[i]:
                         ok = True
                         break