dynamic.py: move logic around to allow symbol access more easily (#346)
authorAndreas Ziegler <andreas.ziegler@fau.de>
Tue, 12 Jan 2021 15:03:47 +0000 (16:03 +0100)
committerGitHub <noreply@github.com>
Tue, 12 Jan 2021 15:03:47 +0000 (07:03 -0800)
So far, the implementation of num_symbols() and get_symbol()
in the DynamicSegment class depended on iter_symbols().
However, most part of iter_symbols() is actually about
determining the number of symbols. Let's move that logic to
the correct method and use it in iter_symbols().

Additionally, in an ELF file without any exported symbols,
the hash table will be empty and will thus return a too low
number of symbols. However, a loader might still need to
access the imported symbols (which also have an entry in
the symbol table, with st_shndx set to SHN_UNDEF). To allow
this, make get_symbol() take any index and simply read the
symbol data from the corresponding index, and use
get_symbol() from iter_symbols(). This way, one can for
example use symbol index information from relocation entries
to directly access the symbol data.

These changes also make the logic in DynamicSegment resemble
the code in SymbolTableSection more closely.

Fixes: #342
elftools/elf/dynamic.py
test/test_dynamic.py

index f03c6b33ca5539f6ffa7dd842b458de3e470a50d..2f85333e484c0ff339b6d06a69b91bb808417901 100644 (file)
@@ -252,80 +252,46 @@ class DynamicSegment(Segment, Dynamic):
         Segment.__init__(self, header, stream)
         Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'],
              self['p_filesz'] == 0)
-        self._symbol_list = None
+        self._symbol_size = self.elfstructs.Elf_Sym.sizeof()
+        self._num_symbols = None
         self._symbol_name_map = None
 
     def num_symbols(self):
         """ Number of symbols in the table recovered from DT_SYMTAB
         """
-        if self._symbol_list is None:
-            self._symbol_list = list(self.iter_symbols())
-        return len(self._symbol_list)
-
-    def get_symbol(self, index):
-        """ Get the symbol at index #index from the table (Symbol object)
-        """
-        if self._symbol_list is None:
-            self._symbol_list = list(self.iter_symbols())
-        return self._symbol_list[index]
-
-    def get_symbol_by_name(self, name):
-        """ Get a symbol(s) by name. Return None if no symbol by the given name
-            exists.
-        """
-        # The first time this method is called, construct a name to number
-        # mapping
-        #
-        if self._symbol_name_map is None:
-            self._symbol_name_map = defaultdict(list)
-            for i, sym in enumerate(self.iter_symbols()):
-                self._symbol_name_map[sym.name].append(i)
-        symnums = self._symbol_name_map.get(name)
-        return [self.get_symbol(i) for i in symnums] if symnums else None
-
-    def iter_symbols(self):
-        """ Yield all symbols in this dynamic segment. The symbols are usually
-            the same as returned by SymbolTableSection.iter_symbols. However,
-            in stripped binaries, SymbolTableSection might have been removed.
-            This method reads from the mandatory dynamic tag DT_SYMTAB.
-        """
-        tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB')
-        if tab_ptr is None or tab_offset is None:
-            raise ELFError('Segment does not contain DT_SYMTAB.')
-
-        symbol_size = self.elfstructs.Elf_Sym.sizeof()
-
-        end_ptr = None
+        if self._num_symbols is not None:
+            return self._num_symbols
 
         # Check if a DT_GNU_HASH tag exists and recover the number of symbols
         # from the corresponding hash table
         _, gnu_hash_offset = self.get_table_offset('DT_GNU_HASH')
         if gnu_hash_offset is not None:
             hash_section = GNUHashTable(self.elffile, gnu_hash_offset, self)
-            end_ptr = tab_ptr + \
-                hash_section.get_number_of_symbols() * symbol_size
+            self._num_symbols = hash_section.get_number_of_symbols()
 
         # If DT_GNU_HASH did not exist, maybe we can use DT_HASH
-        if end_ptr is None:
+        if self._num_symbols is None:
             _, hash_offset = self.get_table_offset('DT_HASH')
             if hash_offset is not None:
                 # Get the hash table from the DT_HASH offset
                 hash_section = ELFHashTable(self.elffile, hash_offset, self)
-                end_ptr = tab_ptr + \
-                    hash_section.get_number_of_symbols() * symbol_size
+                self._num_symbols = hash_section.get_number_of_symbols()
 
-        if end_ptr is None:
+        if self._num_symbols is None:
             # Find closest higher pointer than tab_ptr. We'll use that to mark
             # the end of the symbol table.
+            tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB')
+            if tab_ptr is None or tab_offset is None:
+                raise ELFError('Segment does not contain DT_SYMTAB.')
             nearest_ptr = None
             for tag in self.iter_tags():
                 tag_ptr = tag['d_ptr']
                 if tag['d_tag'] == 'DT_SYMENT':
-                    if symbol_size != tag['d_val']:
+                    if self._symbol_size != tag['d_val']:
                         # DT_SYMENT is the size of one symbol entry. It must be
                         # the same as returned by Elf_Sym.sizeof.
                         raise ELFError('DT_SYMENT (%d) != Elf_Sym (%d).' %
-                                    (tag['d_val'], symbol_size))
+                                       (tag['d_val'], self._symbol_size))
                 if (tag_ptr > tab_ptr and
                         (nearest_ptr is None or nearest_ptr > tag_ptr)):
                     nearest_ptr = tag_ptr
@@ -338,13 +304,49 @@ class DynamicSegment(Segment, Dynamic):
                         nearest_ptr = segment['p_vaddr'] + segment['p_filesz']
 
             end_ptr = nearest_ptr
+            self._num_symbols = (end_ptr - tab_ptr) // self._symbol_size
 
-        if end_ptr is None:
+        if self._num_symbols is None:
             raise ELFError('Cannot determine the end of DT_SYMTAB.')
 
+        return self._num_symbols
+
+    def get_symbol(self, index):
+        """ Get the symbol at index #index from the table (Symbol object)
+        """
+        tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB')
+        if tab_ptr is None or tab_offset is None:
+            raise ELFError('Segment does not contain DT_SYMTAB.')
+
+        symbol = struct_parse(
+            self.elfstructs.Elf_Sym,
+            self._stream,
+            stream_pos=tab_offset + index * self._symbol_size)
+
         string_table = self._get_stringtable()
-        for i in range((end_ptr - tab_ptr) // symbol_size):
-            symbol = struct_parse(self.elfstructs.Elf_Sym, self._stream,
-                                  i * symbol_size + tab_offset)
-            symbol_name = string_table.get_string(symbol['st_name'])
-            yield Symbol(symbol, symbol_name)
+        symbol_name = string_table.get_string(symbol["st_name"])
+
+        return Symbol(symbol, symbol_name)
+
+    def get_symbol_by_name(self, name):
+        """ Get a symbol(s) by name. Return None if no symbol by the given name
+            exists.
+        """
+        # The first time this method is called, construct a name to number
+        # mapping
+        #
+        if self._symbol_name_map is None:
+            self._symbol_name_map = defaultdict(list)
+            for i, sym in enumerate(self.iter_symbols()):
+                self._symbol_name_map[sym.name].append(i)
+        symnums = self._symbol_name_map.get(name)
+        return [self.get_symbol(i) for i in symnums] if symnums else None
+
+    def iter_symbols(self):
+        """ Yield all symbols in this dynamic segment. The symbols are usually
+            the same as returned by SymbolTableSection.iter_symbols. However,
+            in stripped binaries, SymbolTableSection might have been removed.
+            This method reads from the mandatory dynamic tag DT_SYMTAB.
+        """
+        for i in range(self.num_symbols()):
+            yield(self.get_symbol(i))
index 1f48362e61bb7f30027734227fd151133b45e2bb..a310d8aee8d33b0fb6375698ab13679af806c88f 100644 (file)
@@ -74,7 +74,6 @@ class TestDynamic(unittest.TestCase):
         self.assertEqual(symbol_names, exp)
         self.assertEqual(symbol_at_index_3.name, 'abort')
         self.assertIsNotNone(symbols_abort)
-        self.assertEqual(symbols_abort[0], symbol_at_index_3)
 
     def test_reading_symbols_gnu_hash(self):
         """ Verify we can read symbol table without SymbolTableSection but with
@@ -98,7 +97,6 @@ class TestDynamic(unittest.TestCase):
         self.assertEqual(symbol_names[:9], exp)
         self.assertEqual(symbol_at_index_3.name, '__register_atfork')
         self.assertIsNotNone(symbols_atfork)
-        self.assertEqual(symbols_atfork[0], symbol_at_index_3)
 
     def test_sunw_tags(self):
         def extract_sunw(filename):