Add support for ARM exception handler ABI (#328)
authorLeadroyaL <LeadroyaL@users.noreply.github.com>
Wed, 19 Aug 2020 16:35:12 +0000 (00:35 +0800)
committerGitHub <noreply@github.com>
Wed, 19 Aug 2020 16:35:12 +0000 (09:35 -0700)
14 files changed:
elftools/ehabi/__init__.py [new file with mode: 0644]
elftools/ehabi/constants.py [new file with mode: 0644]
elftools/ehabi/decoder.py [new file with mode: 0644]
elftools/ehabi/ehabiinfo.py [new file with mode: 0644]
elftools/ehabi/structs.py [new file with mode: 0644]
elftools/elf/elffile.py
scripts/readelf.py
setup.py
test/test_ehabi_decoder.py [new file with mode: 0644]
test/test_ehabi_elf.py [new file with mode: 0644]
test/testfiles_for_unittests/arm_exidx_test.cpp [new file with mode: 0644]
test/testfiles_for_unittests/arm_exidx_test.elf [new file with mode: 0644]
test/testfiles_for_unittests/arm_exidx_test.o [new file with mode: 0644]
test/testfiles_for_unittests/arm_exidx_test.so [new file with mode: 0755]

diff --git a/elftools/ehabi/__init__.py b/elftools/ehabi/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/elftools/ehabi/constants.py b/elftools/ehabi/constants.py
new file mode 100644 (file)
index 0000000..2921b97
--- /dev/null
@@ -0,0 +1 @@
+EHABI_INDEX_ENTRY_SIZE = 8
diff --git a/elftools/ehabi/decoder.py b/elftools/ehabi/decoder.py
new file mode 100644 (file)
index 0000000..ce20f65
--- /dev/null
@@ -0,0 +1,284 @@
+# -------------------------------------------------------------------------------
+# elftools: ehabi/decoder.py
+#
+# Decode ARM exception handler bytecode.
+#
+# LeadroyaL (leadroyal@qq.com)
+# This code is in the public domain
+# -------------------------------------------------------------------------------
+from collections import namedtuple
+
+
+class EHABIBytecodeDecoder(object):
+    """ Decoder of a sequence of ARM exception handler abi bytecode.
+
+        Reference:
+        https://github.com/llvm/llvm-project/blob/master/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
+        https://developer.arm.com/documentation/ihi0038/b/
+
+        Accessible attributes:
+
+            mnemonic_array:
+                MnemonicItem array.
+
+        Parameters:
+
+            bytecode_array:
+                Integer array, raw data of bytecode.
+
+    """
+
+    def __init__(self, bytecode_array):
+        self._bytecode_array = bytecode_array
+        self._index = None
+        self.mnemonic_array = None
+        self._decode()
+
+    def _decode(self):
+        """ Decode bytecode array, put result into mnemonic_array.
+        """
+        self._index = 0
+        self.mnemonic_array = []
+        while self._index < len(self._bytecode_array):
+            for mask, value, handler in self.ring:
+                if (self._bytecode_array[self._index] & mask) == value:
+                    start_idx = self._index
+                    mnemonic = handler(self)
+                    end_idx = self._index
+                    self.mnemonic_array.append(
+                        MnemonicItem(self._bytecode_array[start_idx: end_idx], mnemonic))
+                    break
+
+    def _decode_00xxxxxx(self):
+        #   SW.startLine() << format("0x%02X      ; vsp = vsp + %u\n", Opcode,
+        #                            ((Opcode & 0x3f) << 2) + 4);
+        opcode = self._bytecode_array[self._index]
+        self._index += 1
+        return 'vsp = vsp + %u' % (((opcode & 0x3f) << 2) + 4)
+
+    def _decode_01xxxxxx(self):
+        # SW.startLine() << format("0x%02X      ; vsp = vsp - %u\n", Opcode,
+        #                          ((Opcode & 0x3f) << 2) + 4);
+        opcode = self._bytecode_array[self._index]
+        self._index += 1
+        return 'vsp = vsp - %u' % (((opcode & 0x3f) << 2) + 4)
+
+    gpr_register_names = ("r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+                          "r8", "r9", "r10", "fp", "ip", "sp", "lr", "pc")
+
+    def _calculate_range(self, start, count):
+        return ((1 << (count + 1)) - 1) << start
+
+    def _printGPR(self, gpr_mask):
+        hits = [self.gpr_register_names[i] for i in range(32) if gpr_mask & (1 << i) != 0]
+        return '{%s}' % ', '.join(hits)
+
+    def _print_registers(self, vfp_mask, prefix):
+        hits = [prefix + str(i) for i in range(32) if vfp_mask & (1 << i) != 0]
+        return '{%s}' % ', '.join(hits)
+
+    def _decode_1000iiii_iiiiiiii(self):
+        op0 = self._bytecode_array[self._index]
+        self._index += 1
+        op1 = self._bytecode_array[self._index]
+        self._index += 1
+        #   uint16_t GPRMask = (Opcode1 << 4) | ((Opcode0 & 0x0f) << 12);
+        #   SW.startLine()
+        #     << format("0x%02X 0x%02X ; %s",
+        #               Opcode0, Opcode1, GPRMask ? "pop " : "refuse to unwind");
+        #   if (GPRMask)
+        #     PrintGPR(GPRMask);
+        gpr_mask = (op1 << 4) | ((op0 & 0x0f) << 12)
+        if gpr_mask == 0:
+            return 'refuse to unwind'
+        else:
+            return 'pop %s' % self._printGPR(gpr_mask)
+
+    def _decode_10011101(self):
+        self._index += 1
+        return 'reserved (ARM MOVrr)'
+
+    def _decode_10011111(self):
+        self._index += 1
+        return 'reserved (WiMMX MOVrr)'
+
+    def _decode_1001nnnn(self):
+        # SW.startLine() << format("0x%02X      ; vsp = r%u\n", Opcode, (Opcode & 0x0f));
+        opcode = self._bytecode_array[self._index]
+        self._index += 1
+        return 'vsp = r%u' % (opcode & 0x0f)
+
+    def _decode_10100nnn(self):
+        # SW.startLine() << format("0x%02X      ; pop ", Opcode);
+        # PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4));
+        opcode = self._bytecode_array[self._index]
+        self._index += 1
+        return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07))
+
+    def _decode_10101nnn(self):
+        # SW.startLine() << format("0x%02X      ; pop ", Opcode);
+        # PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4) | (1 << 14));
+        opcode = self._bytecode_array[self._index]
+        self._index += 1
+        return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07) | (1 << 14))
+
+    def _decode_10110000(self):
+        # SW.startLine() << format("0x%02X      ; finish\n", Opcode);
+        self._index += 1
+        return 'finish'
+
+    def _decode_10110001_0000iiii(self):
+        # SW.startLine()
+        #   << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1,
+        #             ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop ");
+        # if (((Opcode1 & 0xf0) == 0x00) && Opcode1)
+        #   PrintGPR((Opcode1 & 0x0f));
+        self._index += 1  # skip constant byte
+        op1 = self._bytecode_array[self._index]
+        self._index += 1
+        if (op1 & 0xf0) != 0 or op1 == 0x00:
+            return 'spare'
+        else:
+            return 'pop %s' % self._printGPR((op1 & 0x0f))
+
+    def _decode_10110010_uleb128(self):
+        #  SmallVector<uint8_t, 4> ULEB;
+        #  do { ULEB.push_back(Opcodes[OI ^ 3]); } while (Opcodes[OI++ ^ 3] & 0x80);
+        #  uint64_t Value = 0;
+        #  for (unsigned BI = 0, BE = ULEB.size(); BI != BE; ++BI)
+        #    Value = Value | ((ULEB[BI] & 0x7f) << (7 * BI));
+        #  OS << format("; vsp = vsp + %" PRIu64 "\n", 0x204 + (Value << 2));
+        self._index += 1   # skip constant byte
+        uleb_buffer = [self._bytecode_array[self._index]]
+        self._index += 1
+        while self._bytecode_array[self._index] & 0x80 == 0:
+            uleb_buffer.append(self._bytecode_array[self._index])
+            self._index += 1
+        value = 0
+        for b in reversed(uleb_buffer):
+            value = (value << 7) + (b & 0x7F)
+        return 'vsp = vsp + %u' % (0x204 + (value << 2))
+
+    def _decode_10110011_sssscccc(self):
+        # these two decoders are equal
+        return self._decode_11001001_sssscccc()
+
+    def _decode_101101nn(self):
+        return self._spare()
+
+    def _decode_10111nnn(self):
+        #  SW.startLine() << format("0x%02X      ; pop ", Opcode);
+        #  PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 8), "d");
+        opcode = self._bytecode_array[self._index]
+        self._index += 1
+        return 'pop %s' % self._print_registers(self._calculate_range(8, opcode & 0x07), "d")
+
+    def _decode_11000110_sssscccc(self):
+        #  SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1);
+        #  uint8_t Start = ((Opcode1 & 0xf0) >> 4);
+        #  uint8_t Count = ((Opcode1 & 0x0f) >> 0);
+        #  PrintRegisters((((1 << (Count + 1)) - 1) << Start), "wR");
+        self._index += 1  # skip constant byte
+        op1 = self._bytecode_array[self._index]
+        self._index += 1
+        start = ((op1 & 0xf0) >> 4)
+        count = ((op1 & 0x0f) >> 0)
+        return 'pop %s' % self._print_registers(self._calculate_range(start, count), "wR")
+
+    def _decode_11000111_0000iiii(self):
+        #   SW.startLine()
+        #     << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1,
+        #               ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop ");
+        #   if ((Opcode1 & 0xf0) == 0x00 && Opcode1)
+        #       PrintRegisters(Opcode1 & 0x0f, "wCGR");
+        self._index += 1  # skip constant byte
+        op1 = self._bytecode_array[self._index]
+        self._index += 1
+        if (op1 & 0xf0) != 0 or op1 == 0x00:
+            return 'spare'
+        else:
+            return 'pop %s' % self._print_registers(op1 & 0x0f, "wCGR")
+
+    def _decode_11001000_sssscccc(self):
+        #   SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1);
+        #   uint8_t Start = 16 + ((Opcode1 & 0xf0) >> 4);
+        #   uint8_t Count = ((Opcode1 & 0x0f) >> 0);
+        #   PrintRegisters((((1 << (Count + 1)) - 1) << Start), "d");
+        self._index += 1  # skip constant byte
+        op1 = self._bytecode_array[self._index]
+        self._index += 1
+        start = 16 + ((op1 & 0xf0) >> 4)
+        count = ((op1 & 0x0f) >> 0)
+        return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d")
+
+    def _decode_11001001_sssscccc(self):
+        #   SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1);
+        #   uint8_t Start = ((Opcode1 & 0xf0) >> 4);
+        #   uint8_t Count = ((Opcode1 & 0x0f) >> 0);
+        #   PrintRegisters((((1 << (Count + 1)) - 1) << Start), "d");
+        self._index += 1  # skip constant byte
+        op1 = self._bytecode_array[self._index]
+        self._index += 1
+        start = ((op1 & 0xf0) >> 4)
+        count = ((op1 & 0x0f) >> 0)
+        return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d")
+
+    def _decode_11001yyy(self):
+        return self._spare()
+
+    def _decode_11000nnn(self):
+        #   SW.startLine() << format("0x%02X      ; pop ", Opcode);
+        #   PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 10), "wR");
+        opcode = self._bytecode_array[self._index]
+        self._index += 1
+        return 'pop %s' % self._print_registers(self._calculate_range(10, opcode & 0x07), "wR")
+
+    def _decode_11010nnn(self):
+        # these two decoders are equal
+        return self._decode_10111nnn()
+
+    def _decode_11xxxyyy(self):
+        return self._spare()
+
+    def _spare(self):
+        self._index += 1
+        return 'spare'
+
+    _DECODE_RECIPE_TYPE = namedtuple('_DECODE_RECIPE_TYPE', 'mask value handler')
+
+    ring = (
+        _DECODE_RECIPE_TYPE(mask=0xc0, value=0x00, handler=_decode_00xxxxxx),
+        _DECODE_RECIPE_TYPE(mask=0xc0, value=0x40, handler=_decode_01xxxxxx),
+        _DECODE_RECIPE_TYPE(mask=0xf0, value=0x80, handler=_decode_1000iiii_iiiiiiii),
+        _DECODE_RECIPE_TYPE(mask=0xff, value=0x9d, handler=_decode_10011101),
+        _DECODE_RECIPE_TYPE(mask=0xff, value=0x9f, handler=_decode_10011111),
+        _DECODE_RECIPE_TYPE(mask=0xf0, value=0x90, handler=_decode_1001nnnn),
+        _DECODE_RECIPE_TYPE(mask=0xf8, value=0xa0, handler=_decode_10100nnn),
+        _DECODE_RECIPE_TYPE(mask=0xf8, value=0xa8, handler=_decode_10101nnn),
+        _DECODE_RECIPE_TYPE(mask=0xff, value=0xb0, handler=_decode_10110000),
+        _DECODE_RECIPE_TYPE(mask=0xff, value=0xb1, handler=_decode_10110001_0000iiii),
+        _DECODE_RECIPE_TYPE(mask=0xff, value=0xb2, handler=_decode_10110010_uleb128),
+        _DECODE_RECIPE_TYPE(mask=0xff, value=0xb3, handler=_decode_10110011_sssscccc),
+        _DECODE_RECIPE_TYPE(mask=0xfc, value=0xb4, handler=_decode_101101nn),
+        _DECODE_RECIPE_TYPE(mask=0xf8, value=0xb8, handler=_decode_10111nnn),
+        _DECODE_RECIPE_TYPE(mask=0xff, value=0xc6, handler=_decode_11000110_sssscccc),
+        _DECODE_RECIPE_TYPE(mask=0xff, value=0xc7, handler=_decode_11000111_0000iiii),
+        _DECODE_RECIPE_TYPE(mask=0xff, value=0xc8, handler=_decode_11001000_sssscccc),
+        _DECODE_RECIPE_TYPE(mask=0xff, value=0xc9, handler=_decode_11001001_sssscccc),
+        _DECODE_RECIPE_TYPE(mask=0xc8, value=0xc8, handler=_decode_11001yyy),
+        _DECODE_RECIPE_TYPE(mask=0xf8, value=0xc0, handler=_decode_11000nnn),
+        _DECODE_RECIPE_TYPE(mask=0xf8, value=0xd0, handler=_decode_11010nnn),
+        _DECODE_RECIPE_TYPE(mask=0xc0, value=0xc0, handler=_decode_11xxxyyy),
+    )
+
+
+class MnemonicItem(object):
+    """ Single mnemonic item.
+    """
+
+    def __init__(self, bytecode, mnemonic):
+        self.bytecode = bytecode
+        self.mnemonic = mnemonic
+
+    def __repr__(self):
+        return '%s ; %s' % (' '.join(['0x%02x' % x for x in self.bytecode]), self.mnemonic)
diff --git a/elftools/ehabi/ehabiinfo.py b/elftools/ehabi/ehabiinfo.py
new file mode 100644 (file)
index 0000000..415566c
--- /dev/null
@@ -0,0 +1,209 @@
+# -------------------------------------------------------------------------------
+# elftools: ehabi/ehabiinfo.py
+#
+# Decoder for ARM exception handler bytecode.
+#
+# LeadroyaL (leadroyal@qq.com)
+# This code is in the public domain
+# -------------------------------------------------------------------------------
+
+from ..common.utils import struct_parse
+
+from .decoder import EHABIBytecodeDecoder
+from .constants import EHABI_INDEX_ENTRY_SIZE
+from .structs import EHABIStructs
+
+
+class EHABIInfo(object):
+    """ ARM exception handler abi information class.
+
+        Parameters:
+
+            arm_idx_section:
+                elf.sections.Section object, section which type is SHT_ARM_EXIDX.
+
+            little_endian:
+                bool, endianness of elf file.
+    """
+
+    def __init__(self, arm_idx_section, little_endian):
+        self._arm_idx_section = arm_idx_section
+        self._struct = EHABIStructs(little_endian)
+        self._num_entry = None
+
+    def section_name(self):
+        return self._arm_idx_section.name
+
+    def section_offset(self):
+        return self._arm_idx_section['sh_offset']
+
+    def num_entry(self):
+        """ Number of exception handler entry in the section.
+        """
+        if self._num_entry is None:
+            self._num_entry = self._arm_idx_section['sh_size'] // EHABI_INDEX_ENTRY_SIZE
+        return self._num_entry
+
+    def get_entry(self, n):
+        """ Get the exception handler entry at index #n. (EHABIEntry object or a subclass)
+        """
+        if n >= self.num_entry():
+            raise IndexError('Invalid entry %d/%d' % (n, self._num_entry))
+        eh_index_entry_offset = self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE
+        eh_index_data = struct_parse(self._struct.EH_index_struct, self._arm_idx_section.stream, eh_index_entry_offset)
+        word0, word1 = eh_index_data['word0'], eh_index_data['word1']
+
+        if word0 & 0x80000000 != 0:
+            return CorruptEHABIEntry('Corrupt ARM exception handler table entry: %x' % n)
+
+        function_offset = arm_expand_prel31(word0, self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE)
+
+        if word1 == 1:
+            # 0x1 means cannot unwind
+            return CannotUnwindEHABIEntry(function_offset)
+        elif word1 & 0x80000000 == 0:
+            # highest bit is zero, point to .ARM.extab data
+            eh_table_offset = arm_expand_prel31(word1, self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE + 4)
+            eh_index_data = struct_parse(self._struct.EH_table_struct, self._arm_idx_section.stream, eh_table_offset)
+            word0 = eh_index_data['word0']
+            if word0 & 0x80000000 == 0:
+                # highest bit is one, generic model
+                return GenericEHABIEntry(function_offset, arm_expand_prel31(word0, eh_table_offset))
+            else:
+                # highest bit is one, arm compact model
+                # highest half must be 0b1000 for compact model
+                if word0 & 0x70000000 != 0:
+                    return CorruptEHABIEntry('Corrupt ARM compact model table entry: %x' % n)
+                per_index = (word0 >> 24) & 0x7f
+                if per_index == 0:
+                    # arm compact model 0
+                    opcode = [(word0 & 0xFF0000) >> 16, (word0 & 0xFF00) >> 8, word0 & 0xFF]
+                    return EHABIEntry(function_offset, per_index, opcode)
+                elif per_index == 1 or per_index == 2:
+                    # arm compact model 1/2
+                    more_word = (word0 >> 16) & 0xff
+                    opcode = [(word0 >> 8) & 0xff, (word0 >> 0) & 0xff]
+                    self._arm_idx_section.stream.seek(eh_table_offset + 4)
+                    for i in range(more_word):
+                        r = struct_parse(self._struct.EH_table_struct, self._arm_idx_section.stream)['word0']
+                        opcode.append((r >> 24) & 0xFF)
+                        opcode.append((r >> 16) & 0xFF)
+                        opcode.append((r >> 8) & 0xFF)
+                        opcode.append((r >> 0) & 0xFF)
+                    return EHABIEntry(function_offset, per_index, opcode, eh_table_offset=eh_table_offset)
+                else:
+                    return CorruptEHABIEntry('Unknown ARM compact model %d at table entry: %x' % (per_index, n))
+        else:
+            # highest bit is one, compact model must be 0
+            if word1 & 0x7f000000 != 0:
+                return CorruptEHABIEntry('Corrupt ARM compact model table entry: %x' % n)
+            opcode = [(word1 & 0xFF0000) >> 16, (word1 & 0xFF00) >> 8, word1 & 0xFF]
+            return EHABIEntry(function_offset, 0, opcode)
+
+
+class EHABIEntry(object):
+    """ Exception handler abi entry.
+
+        Accessible attributes:
+
+            function_offset:
+                Integer.
+                None if corrupt. (Reference: CorruptEHABIEntry)
+
+            personality:
+                Integer.
+                None if corrupt or unwindable. (Reference: CorruptEHABIEntry, CannotUnwindEHABIEntry)
+                0/1/2 for ARM personality compact format.
+                Others for generic personality.
+
+            bytecode_array:
+                Integer array.
+                None if corrupt or unwindable or generic personality.
+                (Reference: CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry)
+
+            eh_table_offset:
+                Integer.
+                Only entries who point to .ARM.extab contains this field, otherwise return None.
+
+            unwindable:
+                bool. Whether this function is unwindable.
+
+            corrupt:
+                bool. Whether this entry is corrupt.
+
+    """
+
+    def __init__(self,
+                 function_offset,
+                 personality,
+                 bytecode_array,
+                 eh_table_offset=None,
+                 unwindable=True,
+                 corrupt=False):
+        self.function_offset = function_offset
+        self.personality = personality
+        self.bytecode_array = bytecode_array
+        self.eh_table_offset = eh_table_offset
+        self.unwindable = unwindable
+        self.corrupt = corrupt
+
+    def mnmemonic_array(self):
+        if self.bytecode_array:
+            return EHABIBytecodeDecoder(self.bytecode_array).mnemonic_array
+        else:
+            return None
+
+    def __repr__(self):
+        return "<EHABIEntry function_offset=0x%x, personality=%d, %sbytecode=%s>" % (
+            self.function_offset,
+            self.personality,
+            "eh_table_offset=0x%x, " % self.eh_table_offset if self.eh_table_offset else "",
+            self.bytecode_array)
+
+
+class CorruptEHABIEntry(EHABIEntry):
+    """ This entry is corrupt. Attribute #corrupt will be True.
+    """
+
+    def __init__(self, reason):
+        super(CorruptEHABIEntry, self).__init__(function_offset=None, personality=None, bytecode_array=None,
+                                                corrupt=True)
+        self.reason = reason
+
+    def __repr__(self):
+        return "<CorruptEHABIEntry reason=%s>" % self.reason
+
+
+class CannotUnwindEHABIEntry(EHABIEntry):
+    """ This function cannot be unwind. Attribute #unwindable will be False.
+    """
+
+    def __init__(self, function_offset):
+        super(CannotUnwindEHABIEntry, self).__init__(function_offset, personality=None, bytecode_array=None,
+                                                     unwindable=False)
+
+    def __repr__(self):
+        return "<CannotUnwindEHABIEntry function_offset=0x%x>" % self.function_offset
+
+
+class GenericEHABIEntry(EHABIEntry):
+    """ This entry is generic model rather than ARM compact model.Attribute #bytecode_array will be None.
+    """
+
+    def __init__(self, function_offset, personality):
+        super(GenericEHABIEntry, self).__init__(function_offset, personality, bytecode_array=None)
+
+    def __repr__(self):
+        return "<GenericEHABIEntry function_offset=0x%x, personality=0x%x>" % (self.function_offset, self.personality)
+
+
+def arm_expand_prel31(address, place):
+    """
+       address: uint32
+       place: uint32
+       return: uint64
+    """
+    location = address & 0x7fffffff
+    if location & 0x04000000:
+        location |= 0xffffffff80000000
+    return location + place & 0xffffffffffffffff
diff --git a/elftools/ehabi/structs.py b/elftools/ehabi/structs.py
new file mode 100644 (file)
index 0000000..35ceaf3
--- /dev/null
@@ -0,0 +1,47 @@
+# -------------------------------------------------------------------------------
+# elftools: ehabi/structs.py
+#
+# Encapsulation of Construct structs for parsing an EHABI, adjusted for
+# correct endianness and word-size.
+#
+# LeadroyaL (leadroyal@qq.com)
+# This code is in the public domain
+# -------------------------------------------------------------------------------
+
+from ..construct import UBInt32, ULInt32, Struct
+
+
+class EHABIStructs(object):
+    """ Accessible attributes:
+
+            EH_index_struct:
+                Struct of item in section .ARM.exidx.
+
+            EH_table_struct:
+                Struct of item in section .ARM.extab.
+    """
+
+    def __init__(self, little_endian):
+        self._little_endian = little_endian
+        self._create_structs()
+
+    def _create_structs(self):
+        if self._little_endian:
+            self.EHABI_uint32 = ULInt32
+        else:
+            self.EHABI_uint32 = UBInt32
+        self._create_exception_handler_index()
+        self._create_exception_handler_table()
+
+    def _create_exception_handler_index(self):
+        self.EH_index_struct = Struct(
+            'EH_index',
+            self.EHABI_uint32('word0'),
+            self.EHABI_uint32('word1')
+        )
+
+    def _create_exception_handler_table(self):
+        self.EH_table_struct = Struct(
+            'EH_table',
+            self.EHABI_uint32('word0'),
+        )
index 3c8ed519a9868292667043e46652c4fcb63756e5..5020f4c6b0fafabb3d8b07cb062a656d7daa3d09 100644 (file)
@@ -37,6 +37,7 @@ from .gnuversions import (
         GNUVerSymSection)
 from .segments import Segment, InterpSegment, NoteSegment
 from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig
+from ..ehabi.ehabiinfo import EHABIInfo
 from .hash import ELFHashSection, GNUHashSection
 
 class ELFFile(object):
@@ -227,6 +228,25 @@ class ELFFile(object):
                 debug_pubnames_sec = debug_sections[debug_pubnames_name]
                 )
 
+    def has_ehabi_info(self):
+        """ Check whether this file appears to have arm exception handler index table.
+        """
+        return any(s['sh_type'] == 'SHT_ARM_EXIDX' for s in self.iter_sections())
+
+    def get_ehabi_infos(self):
+        """ Generally, shared library and executable contain 1 .ARM.exidx section.
+            Object file contains many .ARM.exidx sections.
+            So we must traverse every section and filter sections whose type is SHT_ARM_EXIDX.
+        """
+        _ret = []
+        if self['e_type'] == 'ET_REL':
+            # TODO: support relocatable file
+            assert False, "Current version of pyelftools doesn't support relocatable file."
+        for section in self.iter_sections():
+            if section['sh_type'] == 'SHT_ARM_EXIDX':
+                _ret.append(EHABIInfo(section, self.little_endian))
+        return _ret if len(_ret) > 0 else None
+
     def get_machine_arch(self):
         """ Return the machine architecture, as detected from the ELF header.
         """
index 366c50e832656c58ff1048fa2ddc9d1589ec9c4a..6d358908d47e844b7eeda2924f244055a7a828f2 100755 (executable)
@@ -61,6 +61,7 @@ from elftools.dwarf.constants import (
     DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file)
 from elftools.dwarf.locationlists import LocationParser, LocationEntry
 from elftools.dwarf.callframe import CIE, FDE, ZERO
+from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry
 
 
 class ReadElf(object):
@@ -563,6 +564,41 @@ class ReadElf(object):
         if not has_relocation_sections:
             self._emitline('\nThere are no relocations in this file.')
 
+    def display_arm_unwind(self):
+        if not self.elffile.has_ehabi_info():
+            self._emitline('There are no .ARM.idx sections in this file.')
+            return
+        for ehabi_info in self.elffile.get_ehabi_infos():
+            # Unwind section '.ARM.exidx' at offset 0x203e8 contains 1009 entries:
+            self._emitline("\nUnwind section '%s' at offset 0x%x contains %d entries" % (
+                ehabi_info.section_name(),
+                ehabi_info.section_offset(),
+                ehabi_info.num_entry()
+            ))
+
+            for i in range(ehabi_info.num_entry()):
+                entry = ehabi_info.get_entry(i)
+                self._emitline()
+                self._emitline("Entry %d:" % i)
+                if isinstance(entry, CorruptEHABIEntry):
+                    self._emitline("    [corrupt] %s" % entry.reason)
+                    continue
+                self._emit("    Function offset 0x%x: " % entry.function_offset)
+                if isinstance(entry, CannotUnwindEHABIEntry):
+                    self._emitline("[cantunwind]")
+                    continue
+                elif entry.eh_table_offset:
+                    self._emitline("@0x%x" % entry.eh_table_offset)
+                else:
+                    self._emitline("Compact (inline)")
+                if isinstance(entry, GenericEHABIEntry):
+                    self._emitline("    Personality: 0x%x" % entry.personality)
+                else:
+                    self._emitline("    Compact model index: %d" % entry.personality)
+                    for mnemonic_item in entry.mnmemonic_array():
+                        self._emit('    ')
+                        self._emitline(mnemonic_item)
+
     def display_version_info(self):
         """ Display the version info contained in the file
         """
@@ -1470,6 +1506,9 @@ def main(stream=None):
     argparser.add_argument('-r', '--relocs',
             action='store_true', dest='show_relocs',
             help='Display the relocations (if present)')
+    argparser.add_argument('-au', '--arm-unwind',
+            action='store_true', dest='show_arm_unwind',
+            help='Display the armeabi unwind information (if present)')
     argparser.add_argument('-x', '--hex-dump',
             action='store', dest='show_hex_dump', metavar='<number|name>',
             help='Dump the contents of section <number|name> as bytes')
@@ -1524,6 +1563,8 @@ def main(stream=None):
                 readelf.display_notes()
             if args.show_relocs:
                 readelf.display_relocations()
+            if args.show_arm_unwind:
+                readelf.display_arm_unwind()
             if args.show_version_info:
                 readelf.display_version_info()
             if args.show_arch_specific:
index 33ebf805aeffe8392301ce339c4ded6a9f4b8b59..0377a1e19c372ac9fbf3dae9abd41fa0b57d3614 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -41,6 +41,7 @@ setup(
         'elftools.elf',
         'elftools.common',
         'elftools.dwarf',
+        'elftools.ehabi',
         'elftools.construct', 'elftools.construct.lib',
         ],
 
diff --git a/test/test_ehabi_decoder.py b/test/test_ehabi_decoder.py
new file mode 100644 (file)
index 0000000..61ad8b4
--- /dev/null
@@ -0,0 +1,95 @@
+# -------------------------------------------------------------------------------
+# elftools: tests
+#
+# LeadroyaL (leadroyal@qq.com)
+# This code is in the public domain
+# -------------------------------------------------------------------------------
+
+import unittest
+
+from elftools.ehabi.decoder import EHABIBytecodeDecoder
+
+
+class TestEHABIDecoder(unittest.TestCase):
+    """ Tests for the EHABI decoder.
+    """
+
+    def testLLVM(self):
+        # Reference: https://github.com/llvm/llvm-project/blob/master/llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s
+        mnemonic_array = EHABIBytecodeDecoder([0xb1, 0x0f, 0xa7, 0x3f, 0xb0, 0xb0]).mnemonic_array
+        self.assertEqual(mnemonic_array[0].mnemonic, "pop {r0, r1, r2, r3}")
+        self.assertEqual(mnemonic_array[1].mnemonic, "pop {r4, r5, r6, r7, r8, r9, r10, fp}")
+        self.assertEqual(mnemonic_array[2].mnemonic, "vsp = vsp + 256")
+        self.assertEqual(mnemonic_array[3].mnemonic, "finish")
+        self.assertEqual(mnemonic_array[4].mnemonic, "finish")
+
+        mnemonic_array = EHABIBytecodeDecoder([0xc9, 0x84, 0xb0]).mnemonic_array
+        self.assertEqual(mnemonic_array[0].mnemonic, "pop {d8, d9, d10, d11, d12}")
+        self.assertEqual(mnemonic_array[1].mnemonic, "finish")
+
+        mnemonic_array = EHABIBytecodeDecoder(
+            [0xD7, 0xC9, 0x02, 0xC8, 0x02, 0xC7, 0x03, 0xC6,
+             0x02, 0xC2, 0xBA, 0xB3, 0x12, 0xB2, 0x80, 0x04,
+             0xB1, 0x01, 0xB0, 0xA9, 0xA1, 0x91, 0x84, 0xC0,
+             0x80, 0xC0, 0x80, 0x01, 0x81, 0x00, 0x80, 0x00,
+             0x42, 0x02, ]).mnemonic_array
+        self.assertEqual(mnemonic_array[0].mnemonic, "pop {d8, d9, d10, d11, d12, d13, d14, d15}")
+        self.assertEqual(mnemonic_array[1].mnemonic, "pop {d0, d1, d2}")
+        self.assertEqual(mnemonic_array[2].mnemonic, "pop {d16, d17, d18}")
+        self.assertEqual(mnemonic_array[3].mnemonic, "pop {wCGR0, wCGR1}")
+        self.assertEqual(mnemonic_array[4].mnemonic, "pop {wR0, wR1, wR2}")
+        self.assertEqual(mnemonic_array[5].mnemonic, "pop {wR10, wR11, wR12}")
+        self.assertEqual(mnemonic_array[6].mnemonic, "pop {d8, d9, d10}")
+        self.assertEqual(mnemonic_array[7].mnemonic, "pop {d1, d2, d3}")
+        self.assertEqual(mnemonic_array[8].mnemonic, "vsp = vsp + 2564")
+        self.assertEqual(mnemonic_array[9].mnemonic, "pop {r0}")
+        self.assertEqual(mnemonic_array[10].mnemonic, "finish")
+        self.assertEqual(mnemonic_array[11].mnemonic, "pop {r4, r5, lr}")
+        self.assertEqual(mnemonic_array[12].mnemonic, "pop {r4, r5}")
+        self.assertEqual(mnemonic_array[13].mnemonic, "vsp = r1")
+        self.assertEqual(mnemonic_array[14].mnemonic, "pop {r10, fp, lr}")
+        self.assertEqual(mnemonic_array[15].mnemonic, "pop {r10, fp}")
+        self.assertEqual(mnemonic_array[16].mnemonic, "pop {r4}")
+        self.assertEqual(mnemonic_array[17].mnemonic, "pop {ip}")
+        self.assertEqual(mnemonic_array[18].mnemonic, "refuse to unwind")
+        self.assertEqual(mnemonic_array[19].mnemonic, "vsp = vsp - 12")
+        self.assertEqual(mnemonic_array[20].mnemonic, "vsp = vsp + 12")
+
+        mnemonic_array = EHABIBytecodeDecoder(
+            [0xD8, 0xD0, 0xCA, 0xC9, 0x00, 0xC8, 0x00, 0xC7,
+             0x10, 0xC7, 0x01, 0xC7, 0x00, 0xC6, 0x00, 0xC0,
+             0xB8, 0xB4, 0xB3, 0x00, 0xB2, 0x00, 0xB1, 0x10,
+             0xB1, 0x01, 0xB1, 0x00, 0xB0, 0xA8, 0xA0, 0x9F,
+             0x9D, 0x91, 0x88, 0x00, 0x80, 0x00, 0x40, 0x00,
+             ]).mnemonic_array
+        self.assertEqual(mnemonic_array[0].mnemonic, "spare")
+        self.assertEqual(mnemonic_array[1].mnemonic, "pop {d8}")
+        self.assertEqual(mnemonic_array[2].mnemonic, "spare")
+        self.assertEqual(mnemonic_array[3].mnemonic, "pop {d0}")
+        self.assertEqual(mnemonic_array[4].mnemonic, "pop {d16}")
+        self.assertEqual(mnemonic_array[5].mnemonic, "spare")
+        self.assertEqual(mnemonic_array[6].mnemonic, "pop {wCGR0}")
+        self.assertEqual(mnemonic_array[7].mnemonic, "spare")
+        self.assertEqual(mnemonic_array[8].mnemonic, "pop {wR0}")
+        self.assertEqual(mnemonic_array[9].mnemonic, "pop {wR10}")
+        self.assertEqual(mnemonic_array[10].mnemonic, "pop {d8}")
+        self.assertEqual(mnemonic_array[11].mnemonic, "spare")
+        self.assertEqual(mnemonic_array[12].mnemonic, "pop {d0}")
+        self.assertEqual(mnemonic_array[13].mnemonic, "vsp = vsp + 516")
+        self.assertEqual(mnemonic_array[14].mnemonic, "spare")
+        self.assertEqual(mnemonic_array[15].mnemonic, "pop {r0}")
+        self.assertEqual(mnemonic_array[16].mnemonic, "spare")
+        self.assertEqual(mnemonic_array[17].mnemonic, "finish")
+        self.assertEqual(mnemonic_array[18].mnemonic, "pop {r4, lr}")
+        self.assertEqual(mnemonic_array[19].mnemonic, "pop {r4}")
+        self.assertEqual(mnemonic_array[20].mnemonic, "reserved (WiMMX MOVrr)")
+        self.assertEqual(mnemonic_array[21].mnemonic, "reserved (ARM MOVrr)")
+        self.assertEqual(mnemonic_array[22].mnemonic, "vsp = r1")
+        self.assertEqual(mnemonic_array[23].mnemonic, "pop {pc}")
+        self.assertEqual(mnemonic_array[24].mnemonic, "refuse to unwind")
+        self.assertEqual(mnemonic_array[25].mnemonic, "vsp = vsp - 4")
+        self.assertEqual(mnemonic_array[26].mnemonic, "vsp = vsp + 4")
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_ehabi_elf.py b/test/test_ehabi_elf.py
new file mode 100644 (file)
index 0000000..9a0c12b
--- /dev/null
@@ -0,0 +1,89 @@
+# -------------------------------------------------------------------------------
+# elftools: tests
+#
+# LeadroyaL (leadroyal@qq.com)
+# This code is in the public domain
+# -------------------------------------------------------------------------------
+
+import unittest
+import os
+
+from elftools.ehabi.ehabiinfo import EHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry, CorruptEHABIEntry
+from elftools.elf.elffile import ELFFile
+
+
+class TestEHABIELF(unittest.TestCase):
+    """ Parse ELF and visit ARM exception handler index table entry.
+    """
+
+    def test_parse_object_file(self):
+        # FIXME: `.ARM.exidx.text.XXX` need relocation, it's too complex for current unittest.
+        fname = os.path.join('test', 'testfiles_for_unittests', 'arm_exidx_test.o')
+        with open(fname, 'rb') as f:
+            elf = ELFFile(f)
+            try:
+                elf.get_ehabi_infos()
+                self.assertTrue(False, "Unreachable code")
+            except AssertionError as e:
+                self.assertEqual(str(e), "Current version of pyelftools doesn't support relocatable file.")
+
+    def test_parse_shared_library(self):
+        fname = os.path.join('test', 'testfiles_for_unittests', 'arm_exidx_test.so')
+        with open(fname, 'rb') as f:
+            elf = ELFFile(f)
+            self.assertTrue(elf.has_ehabi_info())
+            infos = elf.get_ehabi_infos()
+            self.assertEqual(1, len(infos))
+            info = infos[0]
+
+            self.assertIsInstance(info.get_entry(0), EHABIEntry)
+            self.assertEqual(info.get_entry(0).function_offset, 0x34610)
+            self.assertEqual(info.get_entry(0).eh_table_offset, 0x69544)
+            self.assertEqual(info.get_entry(0).bytecode_array, [0x97, 0x41, 0x84, 0x0d, 0xb0, 0xb0])
+
+            self.assertIsInstance(info.get_entry(7), CannotUnwindEHABIEntry)
+            self.assertEqual(info.get_entry(7).function_offset, 0x346f8)
+
+            self.assertIsInstance(info.get_entry(8), EHABIEntry)
+            self.assertEqual(info.get_entry(8).personality, 0)
+            self.assertEqual(info.get_entry(8).function_offset, 0x3473c)
+            self.assertEqual(info.get_entry(8).bytecode_array, [0x97, 0x84, 0x08])
+
+            self.assertIsInstance(info.get_entry(9), GenericEHABIEntry)
+            self.assertEqual(info.get_entry(9).function_offset, 0x3477c)
+            self.assertEqual(info.get_entry(9).personality, 0x31a30)
+
+            for i in range(info.num_entry()):
+                self.assertNotIsInstance(info.get_entry(i), CorruptEHABIEntry)
+
+    def test_parse_executable(self):
+        fname = os.path.join('test', 'testfiles_for_unittests', 'arm_exidx_test.elf')
+        with open(fname, 'rb') as f:
+            elf = ELFFile(f)
+            self.assertTrue(elf.has_ehabi_info())
+            infos = elf.get_ehabi_infos()
+            self.assertEqual(1, len(infos))
+            info = infos[0]
+
+            self.assertIsInstance(info.get_entry(0), EHABIEntry)
+            self.assertEqual(info.get_entry(0).function_offset, 0x4f50)
+            self.assertEqual(info.get_entry(0).eh_table_offset, 0x22864)
+            self.assertEqual(info.get_entry(0).bytecode_array, [0x97, 0x41, 0x84, 0x0d, 0xb0, 0xb0])
+
+            self.assertIsInstance(info.get_entry(7), CannotUnwindEHABIEntry)
+            self.assertEqual(info.get_entry(7).function_offset, 0x5040)
+
+            self.assertIsInstance(info.get_entry(8), GenericEHABIEntry)
+            self.assertEqual(info.get_entry(8).personality, 0x15d21)
+
+            self.assertIsInstance(info.get_entry(9), EHABIEntry)
+            self.assertEqual(info.get_entry(9).function_offset, 0x5144)
+            self.assertEqual(info.get_entry(9).personality, 0)
+            self.assertEqual(info.get_entry(9).bytecode_array, [0x97, 0x84, 0x08])
+
+            for i in range(info.num_entry()):
+                self.assertNotIsInstance(info.get_entry(i), CorruptEHABIEntry)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/testfiles_for_unittests/arm_exidx_test.cpp b/test/testfiles_for_unittests/arm_exidx_test.cpp
new file mode 100644 (file)
index 0000000..4790cc0
--- /dev/null
@@ -0,0 +1,23 @@
+#include <string>
+#include <iostream>
+
+void func1(int i);
+
+void func2(int i);
+
+void func1(int i) {
+    if (i == 0)
+        return;
+    func2(i - 1);
+}
+
+void func2(int i) {
+    if (i == 0)
+        return;
+    func1(i - 1);
+}
+
+int main(int argc, char **argv) {
+    std::string hello = "Hello from C++";
+    std::cout << hello << std::endl;
+}
diff --git a/test/testfiles_for_unittests/arm_exidx_test.elf b/test/testfiles_for_unittests/arm_exidx_test.elf
new file mode 100644 (file)
index 0000000..94bb535
Binary files /dev/null and b/test/testfiles_for_unittests/arm_exidx_test.elf differ
diff --git a/test/testfiles_for_unittests/arm_exidx_test.o b/test/testfiles_for_unittests/arm_exidx_test.o
new file mode 100644 (file)
index 0000000..c13b003
Binary files /dev/null and b/test/testfiles_for_unittests/arm_exidx_test.o differ
diff --git a/test/testfiles_for_unittests/arm_exidx_test.so b/test/testfiles_for_unittests/arm_exidx_test.so
new file mode 100755 (executable)
index 0000000..ef45313
Binary files /dev/null and b/test/testfiles_for_unittests/arm_exidx_test.so differ