Support parsing symbol table in dynamic segment.
authorNam T. Nguyen <namnguyen@chromium.org>
Fri, 25 Jul 2014 18:41:53 +0000 (11:41 -0700)
committerNam T. Nguyen <namnguyen@chromium.org>
Tue, 29 Jul 2014 16:04:40 +0000 (09:04 -0700)
Unlike SymbolTableSection, this patch reads from DT_SYMTAB, and
DT_STRTAB in PT_DYNAMIC segment. It heuristically determines the end of
DT_SYMTAB by finding the next higher pointer in the same segment. GNU
libc (dl-addr.c) assumes that DT_STRTAB comes after DT_SYMTAB.

elftools/elf/dynamic.py
test/test_dynamic.py

index 9f985c200376653792fe3deabe0d49e32cbb01fd..0b3e3e99b9c110c47f210065a24d71f76e934ef1 100644 (file)
@@ -8,7 +8,7 @@
 #-------------------------------------------------------------------------------
 import itertools
 
-from .sections import Section
+from .sections import Section, Symbol
 from .segments import Segment
 from ..common.exceptions import ELFError
 from ..common.utils import struct_parse, parse_cstring_from_stream
@@ -80,6 +80,22 @@ class Dynamic(object):
         # Do not access this directly yourself; use _get_stringtable() instead.
         self._stringtable = stringtable
 
+    def get_table_offset(self, tag_name):
+        """ Return the virtual address and file offset of a dynamic table.
+        """
+        ptr = None
+        for tag in self._iter_tags(type=tag_name):
+            ptr = tag['d_ptr']
+            break
+
+        # If we found a virtual address, locate the offset in the file
+        # by using the program headers.
+        offset = None
+        if ptr:
+            offset = next(self._elffile.address_offsets(ptr), None)
+
+        return ptr, offset
+
     def _get_stringtable(self):
         """ Return a string table for looking up dynamic tag related strings.
 
@@ -92,17 +108,10 @@ class Dynamic(object):
         # If the ELF has stripped its section table (which is unusual, but
         # perfectly valid), we need to use the dynamic tags to locate the
         # dynamic string table.
-        strtab = None
-        for tag in self._iter_tags(type='DT_STRTAB'):
-            strtab = tag['d_val']
-            break
-        # If we found a dynamic string table, locate the offset in the file
-        # by using the program headers.
-        if strtab:
-            table_offset = next(self._elffile.address_offsets(strtab), None)
-            if table_offset is not None:
-                self._stringtable = _DynamicStringTable(self._stream, table_offset)
-                return self._stringtable
+        _, table_offset = self.get_table_offset('DT_STRTAB')
+        if table_offset is not None:
+            self._stringtable = _DynamicStringTable(self._stream, table_offset)
+            return self._stringtable
 
         # That didn't work for some reason.  Let's use the section header
         # even though this ELF is super weird.
@@ -179,3 +188,47 @@ class DynamicSegment(Segment, Dynamic):
                 break
         Segment.__init__(self, header, stream)
         Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'])
+
+    def iter_symbols(self):
+        """ Yield all symbols in this dynamic segment. The symbols are usually
+            the same as returned by SymbolTableSection.iter_symbols. However,
+            in stripped binaries, SymbolTableSection might have been removed.
+            This method reads from the mandatory dynamic tag DT_SYMTAB.
+        """
+        tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB')
+        if tab_ptr is None or tab_offset is None:
+            raise ELFError('Segment does not contain DT_SYMTAB.')
+
+        symbol_size = self._elfstructs.Elf_Sym.sizeof()
+
+        # Find closest higher pointer than tab_ptr. We'll use that to mark the
+        # end of the symbol table.
+        nearest_ptr = None
+        for tag in self.iter_tags():
+            tag_ptr = tag['d_ptr']
+            if tag['d_tag'] == 'DT_SYMENT':
+                if symbol_size != tag['d_val']:
+                    # DT_SYMENT is the size of one symbol entry. It must be the
+                    # same as returned by Elf_Sym.sizeof.
+                    raise ELFError('DT_SYMENT (%d) != Elf_Sym (%d).' %
+                                   (tag['d_val'], symbol_size))
+            if (tag_ptr > tab_ptr and
+                    (nearest_ptr is None or nearest_ptr > tag_ptr)):
+                nearest_ptr = tag_ptr
+
+        if nearest_ptr is None:
+            # Use the end of segment that contains DT_SYMTAB.
+            for segment in self._elffile.iter_segments():
+                if (segment['p_vaddr'] <= tab_ptr and
+                        tab_ptr <= (segment['p_vaddr'] + segment['p_filesz'])):
+                    nearest_ptr = segment['p_vaddr'] + segment['p_filesz']
+
+        if nearest_ptr is None:
+            raise ELFError('Cannot determine the end of DT_SYMTAB.')
+
+        string_table = self._get_stringtable()
+        for i in range((nearest_ptr - tab_ptr) // symbol_size):
+            symbol = struct_parse(self._elfstructs.Elf_Sym, self._stream,
+                                  i * symbol_size + tab_offset)
+            symbol_name = string_table.get_string(symbol['st_name'])
+            yield Symbol(symbol, symbol_name)
index f25febafff33b5c7ab59da1a15238f0f513121c8..8dc6c28362b3cd578da6ec737a61097f077135bc 100644 (file)
@@ -46,6 +46,20 @@ class TestDynamic(unittest.TestCase):
         exp = ['libc.so.6']
         self.assertEqual(libs, exp)
 
+    def test_reading_symbols(self):
+        """Verify we can read symbol table without SymbolTableSection"""
+        with open(os.path.join('test', 'testfiles_for_unittests',
+                               'aarch64_super_stripped.elf'), 'rb') as f:
+            elf = ELFFile(f)
+            for segment in elf.iter_segments():
+                if segment.header.p_type != 'PT_DYNAMIC':
+                    continue
+
+                symbol_names = [x.name for x in segment.iter_symbols()]
+
+        exp = [b'', b'__libc_start_main', b'__gmon_start__', b'abort']
+        self.assertEqual(symbol_names, exp)
+
 
 if __name__ == '__main__':
     unittest.main()