Filename in lineprog index fix made DWARF5 aware (#463)
[pyelftools.git] / examples / dwarf_decode_address.py
1 #-------------------------------------------------------------------------------
2 # elftools example: dwarf_decode_address.py
3 #
4 # Decode an address in an ELF file to find out which function it belongs to
5 # and from which filename/line it comes in the original source file.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from __future__ import print_function
11 import sys
12
13 # If pyelftools is not installed, the example can also run from the root or
14 # examples/ dir of the source distribution.
15 sys.path[0:0] = ['.', '..']
16
17 from elftools.common.utils import bytes2str
18 from elftools.dwarf.descriptions import describe_form_class
19 from elftools.elf.elffile import ELFFile
20
21
22 def process_file(filename, address):
23 print('Processing file:', filename)
24 with open(filename, 'rb') as f:
25 elffile = ELFFile(f)
26
27 if not elffile.has_dwarf_info():
28 print(' file has no DWARF info')
29 return
30
31 # get_dwarf_info returns a DWARFInfo context object, which is the
32 # starting point for all DWARF-based processing in pyelftools.
33 dwarfinfo = elffile.get_dwarf_info()
34
35 funcname = decode_funcname(dwarfinfo, address)
36 file, line = decode_file_line(dwarfinfo, address)
37
38 print('Function:', bytes2str(funcname))
39 print('File:', bytes2str(file))
40 print('Line:', line)
41
42
43 def decode_funcname(dwarfinfo, address):
44 # Go over all DIEs in the DWARF information, looking for a subprogram
45 # entry with an address range that includes the given address. Note that
46 # this simplifies things by disregarding subprograms that may have
47 # split address ranges.
48 for CU in dwarfinfo.iter_CUs():
49 for DIE in CU.iter_DIEs():
50 try:
51 if DIE.tag == 'DW_TAG_subprogram':
52 lowpc = DIE.attributes['DW_AT_low_pc'].value
53
54 # DWARF v4 in section 2.17 describes how to interpret the
55 # DW_AT_high_pc attribute based on the class of its form.
56 # For class 'address' it's taken as an absolute address
57 # (similarly to DW_AT_low_pc); for class 'constant', it's
58 # an offset from DW_AT_low_pc.
59 highpc_attr = DIE.attributes['DW_AT_high_pc']
60 highpc_attr_class = describe_form_class(highpc_attr.form)
61 if highpc_attr_class == 'address':
62 highpc = highpc_attr.value
63 elif highpc_attr_class == 'constant':
64 highpc = lowpc + highpc_attr.value
65 else:
66 print('Error: invalid DW_AT_high_pc class:',
67 highpc_attr_class)
68 continue
69
70 if lowpc <= address < highpc:
71 return DIE.attributes['DW_AT_name'].value
72 except KeyError:
73 continue
74 return None
75
76
77 def decode_file_line(dwarfinfo, address):
78 # Go over all the line programs in the DWARF information, looking for
79 # one that describes the given address.
80 for CU in dwarfinfo.iter_CUs():
81 # First, look at line programs to find the file/line for the address
82 lineprog = dwarfinfo.line_program_for_CU(CU)
83 delta = 1 if lineprog.header.version < 5 else 0
84 prevstate = None
85 for entry in lineprog.get_entries():
86 # We're interested in those entries where a new state is assigned
87 if entry.state is None:
88 continue
89 # Looking for a range of addresses in two consecutive states that
90 # contain the required address.
91 if prevstate and prevstate.address <= address < entry.state.address:
92 filename = lineprog['file_entry'][prevstate.file - delta].name
93 line = prevstate.line
94 return filename, line
95 if entry.state.end_sequence:
96 # For the state with `end_sequence`, `address` means the address
97 # of the first byte after the target machine instruction
98 # sequence and other information is meaningless. We clear
99 # prevstate so that it's not used in the next iteration. Address
100 # info is used in the above comparison to see if we need to use
101 # the line information for the prevstate.
102 prevstate = None
103 else:
104 prevstate = entry.state
105 return None, None
106
107
108 if __name__ == '__main__':
109 if sys.argv[1] == '--test':
110 process_file(sys.argv[2], 0x400503)
111 sys.exit(0)
112
113 if len(sys.argv) < 3:
114 print('Expected usage: {0} <address> <executable>'.format(sys.argv[0]))
115 sys.exit(1)
116 addr = int(sys.argv[1], 0)
117 process_file(sys.argv[2], addr)