changed the method to obtain line programs - now per CU
authorEli Bendersky <eliben@gmail.com>
Sat, 3 Dec 2011 13:50:42 +0000 (15:50 +0200)
committerEli Bendersky <eliben@gmail.com>
Sat, 3 Dec 2011 13:50:42 +0000 (15:50 +0200)
elftools/dwarf/dwarfinfo.py
elftools/dwarf/lineprogram.py
elftools/dwarf/structs.py
z.py

index 90ba52a197bdc4afc093f179a65905338a3cc6f0..8ebb47df6ab6a136f51002214efb97b29a3b8a6b 100644 (file)
@@ -68,10 +68,6 @@ class DWARFInfo(object):
         
         # Cache for abbrev tables: a dict keyed by offset
         self._abbrevtable_cache = {}
-
-        # A list of parsed line programs. Populated lazily when the line
-        # programs are actually requested
-        self._lineprograms = None
     
     def iter_CUs(self):
         """ Yield all the compile units (CompileUnit objects) in the debug info
@@ -80,13 +76,6 @@ class DWARFInfo(object):
             self._CUs = self._parse_CUs()
         return iter(self._CUs)
 
-    def iter_line_programs(self):
-        """ Yield all the line programs (LineProgram ojects) in the debug info
-        """
-        if self._lineprograms is None:
-            self._lineprograms = self._parse_line_programs()
-        return iter(self._lineprograms)
-
     def get_abbrev_table(self, offset):
         """ Get an AbbrevTable from the given offset in the debug_abbrev
             section.
@@ -115,6 +104,20 @@ class DWARFInfo(object):
         """
         return parse_cstring_from_stream(self.debug_str_sec.stream, offset)
     
+    def line_program_for_CU(self, CU):
+        """ Given a CU object, fetch the line program it points to from the
+            .debug_line section.
+            If the CU doesn't point to a line program, return None.
+        """
+        # The line program is pointed to by the DW_AT_stmt_list attribute of
+        # the top DIE of a CU.
+        top_DIE = CU.get_top_DIE()
+        if 'DW_AT_stmt_list' in top_DIE.attributes:
+            return self._parse_line_program_at_offset(
+                    top_DIE.attributes['DW_AT_stmt_list'], CU.structs)
+        else:
+            return None
+        
     #------ PRIVATE ------#
     
     def _parse_CUs(self):
@@ -177,48 +180,24 @@ class DWARFInfo(object):
         """
         return 2 <= version <= 3
 
-    def _parse_line_programs(self):
-        """ Parse line programs from debug_line
+    def _parse_line_program_at_offset(self, debug_line_offset, structs):
+        """ Given an offset to the .debug_line section, parse the line program
+            starting at this offset in the section and return it.
+            structs is the DWARFStructs object used to do this parsing.
         """
-        offset = 0
-        lineprograms = []
-        while offset < self.debug_line_sec.size:
-            # Similarly to CU parsing, peek at the initial_length field of the
-            # header to figure out the DWARF format for it.
-            initial_length = struct_parse(
-                self.structs.Dwarf_uint32(''),
-                self.debug_line_sec.stream,
-                offset)
-            dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32
-
-            # Prepare the structs for this line program, based on its format
-            # and the default endianness. The address_size plays no role for
-            # line programs so we just give it a default value.
-            lineprog_structs = DWARFStructs(
-                little_endian=self.little_endian,
-                dwarf_format=dwarf_format,
-                address_size=4)
-
-            # Now parse the header fully using up-to-date structs. After this,
-            # the section stream will point at the beginning of the program
-            # itself, right after the header.
-            lineprog_header = struct_parse(
-                lineprog_structs.Dwarf_lineprog_header,
-                self.debug_line_sec.stream,
-                offset)
-
-            # Calculate the offset to the next line program (see DWARF 6.2.4)
-            end_offset = (  offset + lineprog_header['unit_length'] +
-                            lineprog_structs.initial_length_field_size()))
-
-            lineprograms.append(LineProgram(
-                header=lineprog_header,
-                dwarfinfo=self,
-                structs=lineprog_structs,
-                program_start_offset=self.debug_line_sec.stream.tell()),
-                program_end_offset=end_offset)
-
-            offset = end_offset
-
-        return lineprograms
+        lineprog_header = struct_parse(
+            structs.Dwarf_lineprog_header,
+            self.debug_line_sec.stream,
+            offset)
+
+        # Calculate the offset to the next line program (see DWARF 6.2.4)
+        end_offset = (  offset + lineprog_header['unit_length'] +
+                        lineprog_structs.initial_length_field_size())
+
+        return LineProgram(
+            header=lineprog_header,
+            dwarfinfo=self,
+            structs=lineprog_structs,
+            program_start_offset=self.debug_line_sec.stream.tell(),
+            program_end_offset=end_offset)
 
index ce92075282d842ab928f56b45567e89cfbdab052..43d8f90304fec7953d60592544990defaefc7817 100644 (file)
@@ -13,6 +13,8 @@ from .constants import *
 class LineState(object):
     """ Represents a line program state (or a "row" in the matrix
         describing debug location information for addresses).
+        The instance variables of this class are the "state machine registers"
+        described in section 6.2.2 of DWARFv3
     """
     def __init__(self, default_is_stmt):
         self.address = 0
@@ -37,7 +39,9 @@ class LineProgram(object):
                  program_start_offset, program_end_offset):
         """ 
             header:
-                The header of this line program
+                The header of this line program. Note: LineProgram may modify
+                its header by appending file entries if DW_LNE_define_file
+                instructions are encountered.
 
             dwarfinfo:
                 The DWARFInfo context object which created this one
@@ -77,6 +81,15 @@ class LineProgram(object):
         linetable = []
         state = LineState(self.header['default_is_stmt'])
 
+        def add_state_to_table():
+            # Used by instructions that have to add the current state to the
+            # line table. After adding, some state registers have to be
+            # cleared.
+            linetable.append(state)
+            state.basic_block = False
+            state.prologue_end = False
+            state.epilogue_begin = False
+
         offset = self.program_start_offset
         while offset < self.program_end_offset:
             opcode = struct_parse(
@@ -87,30 +100,69 @@ class LineProgram(object):
             # As an exercise in avoiding premature optimization, if...elif
             # chains are used here for standard and extended opcodes instead
             # of dispatch tables. This keeps the code much cleaner. Besides,
-            # the majority of instructions are special opcodes anyway.
-            if opcode == 0:
+            # the majority of instructions in a typical program are special
+            # opcodes anyway.
+            if opcode >= self.header['opcode_base']:
+                # Special opcode (follow the recipe in 6.2.5.1)
+                adjusted_opcode = opcode - self['opcode_base']
+                state.address += ((adjusted_opcode / self['line_range']) *
+                                  self['minimum_instruction_length'])
+                self.line += (self['line_base'] + 
+                              adjusted_opcode % self['line_range'])
+                add_state_to_table()
+            elif opcode == 0:
                 # Extended opcode: start with a zero byte, followed by
                 # instruction size and the instruction itself.
-                pass
-            elif opcode < self.header['opcode_base']:
+                inst_len = struct_parse(self.Dwarf_uleb128, self.stream)
+                ex_opcode = struct_parse(self.Dwarf_uint8, self.stream)
+
+                if ex_opcode == DW_LNE_end_sequence:
+                    state.end_sequence = True
+                    add_state_to_table(state)
+                    state = LineState() # reset state
+                elif ex_opcode == DW_LNE_set_address:
+                    operand = struct_parse(self.Dwarf_target_addr, self.stream)
+                    state.address = operand
+                elif ex_opcode == DW_LNE_define_file:
+                    operand = struct_parse(self.Dwarf_lineprog_file_entry,
+                                           self.stream)
+                    self['file_entry'].append(operand)
+            else: # 0 < opcode < opcode_base
                 # Standard opcode
                 if opcode == DW_LNS_copy:
-                    linetable.append(state)
-                    state.basic_block = False
-                    state.prologue_end = False
-                    state.epilogue_begin = False
+                    add_state_to_table()
                 elif opcode == DW_LNS_advance_pc:
                     operand = struct_parse(self.Dwarf_uleb128, self.stream)
                     state.address += (
                         operand * self.header['minimum_instruction_length'])
-                elif opcode = DW_LNS_advance_line:
+                elif opcode == DW_LNS_advance_line:
                     operand = struct_parse(self.Dwarf_sleb128, self.stream)
                     state.line += operand
-                # ZZZ! go on now...
-            else:
-                # Special opcode
-                pass
-
-    def _handle_LNS_copy(self, opcode, state, linetable):
-        pass
+                elif opcode == DW_LNS_set_file:
+                    operand = struct_parse(self.Dwarf_sleb128, self.stream)
+                    state.file = operand
+                elif opcode == DW_LNS_set_column:
+                    operand = struct_parse(self.Dwarf_uleb128, self.stream)
+                    state.column = operand
+                elif opcode == DW_LNS_negate_stmt:
+                    state.is_stmt = not state.is_stmt
+                elif opcode == DW_LNS_set_basic_block:
+                    state.basic_block = True
+                elif opcode == DW_LNS_const_add_pc:
+                    adjusted_opcode = 255 - self['opcode_base']
+                    state.address += ((adjusted_opcode / self['line_range']) *
+                                      self['minimum_instruction_length'])
+                elif opcode == DW_LNS_fixed_advance_pc:
+                    operand = struct_parse(self.Dwarf_uint16, self.stream)
+                    state.address += operand
+                elif opcode == DW_LNS_set_prologue_end:
+                    state.prologue_end = True
+                elif opcode == DW_LNS_set_epilogue_begin:
+                    state.epilogue_begin = True
+                elif opcode == DW_LNS_set_isa:
+                    operand = struct_parse(self.Dwarf_uleb128, self.stream)
+                    state.isa = operand
+                else:
+                    dwarf_assert(False, 'Invalid standard line program opcode: %s' % (
+                        opcode,))
 
index 6d71657a1b15c5814a353ae80dccf624528e9d6d..ebf34c668f9a90260a9770a238652077d0cf032b 100644 (file)
@@ -59,6 +59,9 @@ class DWARFStructs(object):
 
             Dwarf_lineprog_header (+):
                 Line program header
+
+            Dwarf_lineprog_file_entry (+):
+                A single file entry in a line program header or instruction
         
         See also the documentation of public methods.
     """
@@ -188,7 +191,7 @@ class DWARFStructs(object):
     def _create_lineprog_header(self):
         # A file entry is terminated by a NULL byte, so we don't want to parse
         # past it. Therefore an If is used.
-        file_entry = Struct('file_entry',
+        self.Dwarf_lineprog_file_entry = Struct('file_entry',
             CString('name'),
             If(lambda ctx: len(ctx.name) != 0,
                 Embed(Struct('',
@@ -212,7 +215,7 @@ class DWARFStructs(object):
                 CString('include_directory')),
             RepeatUntilExcluding(
                 lambda obj, ctx: len(obj.name) == 0,
-                file_entry),
+                self.Dwarf_lineprog_file_entry),
             )
         
     def _make_block_struct(self, length_field):
diff --git a/z.py b/z.py
index 4bf370970f5490757a51ea411a1b93256c471192..f60394881669285a4782eb4c0a44a26784d7979e 100644 (file)
--- a/z.py
+++ b/z.py
@@ -23,7 +23,7 @@ print '===> %s sections!' % efile.num_sections()
 
 dwarfinfo = efile.get_dwarf_info()
 
-for lp in dwarfinfo.iter_line_programs():
-    print lp
-    print lp.header
+#for lp in dwarfinfo.iter_line_programs():
+    #print lp
+    #print lp.header