support parsing of dynamic ELFs w/out section headers
authorMike Frysinger <vapier@gentoo.org>
Sun, 9 Jun 2013 22:42:40 +0000 (18:42 -0400)
committerMike Frysinger <vapier@gentoo.org>
Sun, 9 Jun 2013 22:42:40 +0000 (18:42 -0400)
At runtime, ELFs do not use the section headers at all. Instead, only
the program segments and dynamic tags get used. This means you can
strip the section table completely from an ELF and have it still work.

In practice, people rarely do this, but it's not unheard of. Make the
Dynamic tags work even in these cases by loading the strings table the
same way the runtime loader does:
 * parse the symtab address from DT_STRTAB
 * locate the file offset via the program segments

In order to avoid circular deps (parsing a dyntag requires walking parsed
dyntags), add a set of internal funcs for returning the raw values.

You can see this in action:
$ eu-strip -g --strip-sections a.out
$ readelf -S a.out
<nothing>
$ lddtree.py ./a.out
a.out (interpreter => /lib64/ld-linux-x86-64.so.2)
    libïäöëß.so => None
    libc.so.6 => /lib64/libc.so.6

elftools/elf/dynamic.py
test/test_dynamic.py
test/testfiles_for_unittests/aarch64_super_stripped.elf [new file with mode: 0755]

index d9db870380b760b19275db5da80e2f46106d0bd0..9f985c200376653792fe3deabe0d49e32cbb01fd 100644 (file)
@@ -11,7 +11,22 @@ import itertools
 from .sections import Section
 from .segments import Segment
 from ..common.exceptions import ELFError
-from ..common.utils import struct_parse
+from ..common.utils import struct_parse, parse_cstring_from_stream
+
+
+class _DynamicStringTable(object):
+    """ Bare string table based on values found via ELF dynamic tags and
+        loadable segments only.  Good enough for get_string() only.
+    """
+    def __init__(self, stream, table_offset):
+        self._stream = stream
+        self._table_offset = table_offset
+
+    def get_string(self, offset):
+        """ Get the string stored at the given offset in this string table.
+        """
+        return parse_cstring_from_stream(self._stream,
+                                         self._table_offset + offset)
 
 
 class DynamicTag(object):
@@ -61,27 +76,68 @@ class Dynamic(object):
         self._num_tags = -1
         self._offset = position
         self._tagsize = self._elfstructs.Elf_Dyn.sizeof()
+
+        # Do not access this directly yourself; use _get_stringtable() instead.
         self._stringtable = stringtable
 
-    def iter_tags(self, type=None):
-        """ Yield all tags (limit to |type| if specified)
+    def _get_stringtable(self):
+        """ Return a string table for looking up dynamic tag related strings.
+
+            This won't be a "full" string table object, but will at least
+            support the get_string() function.
+        """
+        if self._stringtable:
+            return self._stringtable
+
+        # If the ELF has stripped its section table (which is unusual, but
+        # perfectly valid), we need to use the dynamic tags to locate the
+        # dynamic string table.
+        strtab = None
+        for tag in self._iter_tags(type='DT_STRTAB'):
+            strtab = tag['d_val']
+            break
+        # If we found a dynamic string table, locate the offset in the file
+        # by using the program headers.
+        if strtab:
+            table_offset = next(self._elffile.address_offsets(strtab), None)
+            if table_offset is not None:
+                self._stringtable = _DynamicStringTable(self._stream, table_offset)
+                return self._stringtable
+
+        # That didn't work for some reason.  Let's use the section header
+        # even though this ELF is super weird.
+        self._stringtable = self._elffile.get_section_by_name(b'.dynstr')
+        return self._stringtable
+
+    def _iter_tags(self, type=None):
+        """ Yield all raw tags (limit to |type| if specified)
         """
         for n in itertools.count():
-            tag = self.get_tag(n)
-            if type is None or tag.entry.d_tag == type:
+            tag = self._get_tag(n)
+            if type is None or tag['d_tag'] == type:
                 yield tag
-            if tag.entry.d_tag == 'DT_NULL':
+            if tag['d_tag'] == 'DT_NULL':
                 break
 
-    def get_tag(self, n):
-        """ Get the tag at index #n from the file (DynamicTag object)
+    def iter_tags(self, type=None):
+        """ Yield all tags (limit to |type| if specified)
+        """
+        for tag in self._iter_tags(type=type):
+            yield DynamicTag(tag, self._get_stringtable())
+
+    def _get_tag(self, n):
+        """ Get the raw tag at index #n from the file
         """
         offset = self._offset + n * self._tagsize
-        entry = struct_parse(
+        return struct_parse(
             self._elfstructs.Elf_Dyn,
             self._stream,
             stream_pos=offset)
-        return DynamicTag(entry, self._stringtable)
+
+    def get_tag(self, n):
+        """ Get the tag at index #n from the file (DynamicTag object)
+        """
+        return DynamicTag(self._get_tag(n), self._get_stringtable())
 
     def num_tags(self):
         """ Number of dynamic tags in the file
index 0ee9b358a45e01cc62c1e78b747b68b45e2cf5fa..f25febafff33b5c7ab59da1a15238f0f513121c8 100644 (file)
@@ -10,16 +10,42 @@ except ImportError:
     import unittest
 import os
 
-from utils import setup_syspath; setup_syspath()
+from utils import setup_syspath
+setup_syspath()
+from elftools.elf.elffile import ELFFile
 from elftools.common.exceptions import ELFError
 from elftools.elf.dynamic import DynamicTag
 
 
 class TestDynamicTag(unittest.TestCase):
+    """Tests for the DynamicTag class."""
+
     def test_requires_stringtable(self):
         with self.assertRaises(ELFError):
             dt = DynamicTag('', None)
 
 
+class TestDynamic(unittest.TestCase):
+    """Tests for the Dynamic class."""
+
+    def test_missing_sections(self):
+        """Verify we can get dynamic strings w/out section headers"""
+
+        libs = []
+        with open(os.path.join('test', 'testfiles_for_unittests',
+                               'aarch64_super_stripped.elf'), 'rb') as f:
+            elf = ELFFile(f)
+            for segment in elf.iter_segments():
+                if segment.header.p_type != 'PT_DYNAMIC':
+                    continue
+
+                for t in segment.iter_tags():
+                    if t.entry.d_tag == 'DT_NEEDED':
+                        libs.append(t.needed.decode('utf-8'))
+
+        exp = ['libc.so.6']
+        self.assertEqual(libs, exp)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/testfiles_for_unittests/aarch64_super_stripped.elf b/test/testfiles_for_unittests/aarch64_super_stripped.elf
new file mode 100755 (executable)
index 0000000..0e5c2c4
Binary files /dev/null and b/test/testfiles_for_unittests/aarch64_super_stripped.elf differ