2 #-------------------------------------------------------------------------------
5 # A clone of 'llvm-dwarfdump-11' in Python, based on the pyelftools library
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
11 import os
, sys
, posixpath
14 # For running from development directory. It should take precedence over the
15 # installed pyelftools.
16 sys
.path
.insert(0, '.')
18 from elftools
import __version__
19 from elftools
.common
.exceptions
import DWARFError
, ELFError
20 from elftools
.common
.py3compat
import bytes2str
21 from elftools
.elf
.elffile
import ELFFile
22 from elftools
.dwarf
.locationlists
import LocationParser
, LocationEntry
, LocationExpr
, LocationViewPair
, BaseAddressEntry
as LocBaseAddressEntry
23 from elftools
.dwarf
.ranges
import RangeEntry
# ranges.BaseAddressEntry collides with the one above
24 import elftools
.dwarf
.ranges
25 from elftools
.dwarf
.enums
import *
26 from elftools
.dwarf
.dwarf_expr
import DWARFExprParser
, DWARFExprOp
27 from elftools
.dwarf
.datatype_cpp
import describe_cpp_datatype
28 from elftools
.dwarf
.descriptions
import describe_reg_name
30 # ------------------------------
31 # ------------------------------
34 top_die
= cu
.get_top_DIE()
35 attr
= top_die
.attributes
36 if 'DW_AT_low_pc' in attr
:
37 return attr
['DW_AT_low_pc'].value
38 elif 'DW_AT_entry_pc' in attr
:
39 return attr
['DW_AT_entry_pc'].value
41 raise ValueError("Can't find the base IP (low_pc) for a CU")
43 def _addr_str_length(die
):
44 return die
.cu
.header
.address_size
*2
47 if 'DW_AT_name' in die
.attributes
:
48 return bytes2str(die
.attributes
['DW_AT_name'].value
)
49 elif 'DW_AT_linkage_name' in die
.attributes
:
50 return bytes2str(die
.attributes
['DW_AT_linkage_name'].value
)
54 def _DIE_linkage_name(die
):
55 if 'DW_AT_linkage_name' in die
.attributes
:
56 return bytes2str(die
.attributes
['DW_AT_linkage_name'].value
)
57 elif 'DW_AT_name' in die
.attributes
:
58 return bytes2str(die
.attributes
['DW_AT_name'].value
)
62 def _safe_DIE_name(die
, default
=None):
63 if 'DW_AT_name' in die
.attributes
:
64 return bytes2str(die
.attributes
['DW_AT_name'].value
)
65 elif 'DW_AT_linkage_name' in die
.attributes
:
66 return bytes2str(die
.attributes
['DW_AT_linkage_name'].value
)
70 def _safe_DIE_linkage_name(die
, default
=None):
71 if 'DW_AT_linkage_name' in die
.attributes
:
72 return bytes2str(die
.attributes
['DW_AT_linkage_name'].value
)
73 elif 'DW_AT_name' in die
.attributes
:
74 return bytes2str(die
.attributes
['DW_AT_name'].value
)
78 def _desc_ref(attr
, die
, extra
=''):
80 extra
= " \"%s\"" % extra
81 return "cu + 0x%04x => {0x%08x}%s" % (
83 die
.cu
.cu_offset
+ attr
.raw_value
,
86 def _desc_data(attr
, die
):
87 """ Hex with length driven by form
89 len = int(attr
.form
[12:]) * 2
90 return "0x%0*x" % (len, attr
.value
,)
92 FORM_DESCRIPTIONS
= dict(
93 DW_FORM_string
=lambda attr
, die
: "\"%s\"" % (bytes2str(attr
.value
),),
94 DW_FORM_strp
=lambda attr
, die
: " .debug_str[0x%08x] = \"%s\"" % (attr
.raw_value
, bytes2str(attr
.value
).replace("\\", "\\\\")),
95 DW_FORM_line_strp
=lambda attr
, die
: ".debug_line_str[0x%08x] = \"%s\"" % (attr
.raw_value
, bytes2str(attr
.value
).replace("\\", "\\\\")),
96 DW_FORM_flag_present
=lambda attr
, die
: "true",
97 DW_FORM_flag
=lambda attr
, die
: "0x%02x" % int(attr
.value
),
98 DW_FORM_addr
=lambda attr
, die
: "0x%0*x" % (_addr_str_length(die
), attr
.value
),
99 DW_FORM_data1
=_desc_data
,
100 DW_FORM_data2
=_desc_data
,
101 DW_FORM_data4
=_desc_data
,
102 DW_FORM_data8
=_desc_data
,
103 DW_FORM_block1
=lambda attr
, die
: "<0x%02x> %s " % (len(attr
.value
), " ".join("%02x" %b
for b
in attr
.value
)),
104 DW_FORM_block2
=lambda attr
, die
: "<0x%04x> %s " % (len(attr
.value
), " ".join("%02x" %b
for b
in attr
.value
)),
105 DW_FORM_block4
=lambda attr
, die
: "<0x%08x> %s " % (len(attr
.value
), " ".join("%02x" %b
for b
in attr
.value
)),
106 DW_FORM_ref
=_desc_ref
,
107 DW_FORM_ref1
=_desc_ref
, DW_FORM_ref2
=_desc_ref
,
108 DW_FORM_ref4
=_desc_ref
, DW_FORM_ref8
=_desc_ref
,
109 DW_FORM_sec_offset
=lambda attr
,die
: "0x%08x" % (attr
.value
,),
110 DW_FORM_exprloc
=lambda attr
, die
: _desc_expression(attr
.value
, die
)
113 def _desc_enum(attr
, enum
):
114 """For attributes like DW_AT_language, physically
115 int, logically an enum
117 return next((k
for (k
, v
) in enum
.items() if v
== attr
.value
), str(attr
.value
))
119 def _cu_comp_dir(cu
):
120 return bytes2str(cu
.get_top_DIE().attributes
['DW_AT_comp_dir'].value
)
122 def _desc_decl_file(attr
, die
):
124 if not hasattr(cu
, "_lineprogram"):
125 cu
._lineprogram
= die
.dwarfinfo
.line_program_for_CU(cu
)
127 if cu
._lineprogram
and val
> 0 and val
<= len(cu
._lineprogram
.header
.file_entry
):
128 file_entry
= cu
._lineprogram
.header
.file_entry
[val
-1]
129 includes
= cu
._lineprogram
.header
.include_directory
130 if file_entry
.dir_index
> 0:
131 dir = bytes2str(includes
[file_entry
.dir_index
- 1])
132 if dir.startswith('.'):
133 dir = posixpath
.join(_cu_comp_dir(cu
), dir)
135 dir = _cu_comp_dir(cu
)
136 return "\"%s\"" % (posixpath
.join(dir, bytes2str(file_entry
.name
)),)
140 def _desc_ranges(attr
, die
):
141 di
= die
.cu
.dwarfinfo
142 if not hasattr(di
, '_rnglists'):
143 di
._rangelists
= di
.range_lists()
144 rangelist
= di
._rangelists
.get_range_list_at_offset(attr
.value
)
145 base_ip
= _get_cu_base(die
.cu
)
147 addr_str_len
= die
.cu
.header
.address_size
*2
148 for entry
in rangelist
:
149 if isinstance(entry
, RangeEntry
):
150 lines
.append(" [0x%0*x, 0x%0*x)" % (
152 (0 if entry
.is_absolute
else base_ip
) + entry
.begin_offset
,
154 (0 if entry
.is_absolute
else base_ip
) + entry
.end_offset
))
155 elif isinstance(entry
, elftools
.dwarf
.ranges
.BaseAddressEntry
):
156 base_ip
= entry
.base_address
158 raise NotImplementedError("Unknown object in a range list")
159 return ("0x%08x\n" % attr
.value
) + "\n".join(lines
)
161 def _desc_locations(attr
, die
):
164 if not hasattr(di
, '_loclists'):
165 di
._loclists
= di
.location_lists()
166 if not hasattr(di
, '_locparser'):
167 di
._locparser
= LocationParser(di
._loclists
)
168 loclist
= di
._locparser
.parse_from_attribute(attr
, cu
.header
.version
, die
)
169 if isinstance(loclist
, LocationExpr
):
170 return _desc_expression(loclist
.loc_expr
, die
)
172 base_ip
= _get_cu_base(cu
)
174 addr_str_len
= die
.cu
.header
.address_size
*2
175 for entry
in loclist
:
176 if isinstance(entry
, LocationEntry
):
177 lines
.append(" [0x%0*x, 0x%0*x): %s" % (
179 (0 if entry
.is_absolute
else base_ip
) + entry
.begin_offset
,
181 (0 if entry
.is_absolute
else base_ip
) + entry
.end_offset
,
182 _desc_expression(entry
.loc_expr
, die
)))
183 elif isinstance(entry
, LocBaseAddressEntry
):
184 base_ip
= entry
.base_address
186 raise NotImplementedError("Unknown object in a location list")
187 return ("0x%08x:\n" % attr
.value
) + "\n".join(lines
)
189 # By default, numeric arguments are spelled in hex with a leading 0x
190 def _desc_operationarg(s
, cu
):
191 if isinstance(s
, str):
193 elif isinstance(s
, int):
195 elif isinstance(s
, list): # Could be a blob (list of ints), could be a subexpression
196 if len(s
) > 0 and isinstance(s
[0], DWARFExprOp
): # Subexpression
197 return '(' + '; '.join(_desc_operation(op
.op
, op
.op_name
, op
.args
, cu
) for op
in s
) + ')'
199 return " ".join((hex(len(s
)),) + tuple("0x%02x" % b
for b
in s
))
202 return cu
.dwarfinfo
.config
.machine_arch
204 def _desc_reg(reg_no
, cu
):
205 return describe_reg_name(reg_no
, _arch(cu
), True).upper()
207 def _desc_operation(op
, op_name
, args
, cu
):
208 # Not sure about regx(regno) and bregx(regno, offset)
209 if 0x50 <= op
<= 0x6f: # reg0...reg31 - decode reg name
210 return op_name
+ " " + _desc_reg(op
- 0x50, cu
)
211 elif 0x70 <= op
<= 0x8f: # breg0...breg31(offset) - also decode reg name
212 return '%s %s%+d' % (
214 _desc_reg(op
- 0x70, cu
),
216 elif op_name
in ('DW_OP_fbreg', 'DW_OP_bra', 'DW_OP_skip'): # Argument is decimal with a leading sign
217 return op_name
+ ' ' + "%+d" % (args
[0])
218 elif op_name
in ('DW_OP_const1s', 'DW_OP_const2s'): # Argument is decimal without a leading sign
219 return op_name
+ ' ' + "%d" % (args
[0])
220 elif op_name
in ('DW_OP_entry_value', 'DW_OP_GNU_entry_value'): # No space between opcode and args
221 return op_name
+ _desc_operationarg(args
[0], cu
)
222 elif op_name
== 'DW_OP_regval_type': # Arg is a DIE pointer
223 return "%s %s (0x%08x -> 0x%08x) \"%s\"" % (
225 _desc_reg(args
[0], cu
),
227 args
[1] + cu
.cu_offset
,
228 _DIE_name(cu
._get
_cached
_DIE
(args
[1] + cu
.cu_offset
)))
229 elif op_name
== 'DW_OP_convert': # Arg is a DIE pointer
230 return "%s (0x%08x -> 0x%08x) \"%s\"" % (
233 args
[0] + cu
.cu_offset
,
234 _DIE_name(cu
._get
_cached
_DIE
(args
[0] + cu
.cu_offset
)))
236 return op_name
+ ' ' + ', '.join(_desc_operationarg(s
, cu
) for s
in args
)
240 # TODO: remove this once dwarfdump catches up
242 'DW_OP_implicit_pointer',
244 'DW_OP_GNU_parameter_ref',
245 'DW_OP_GNU_deref_type',
246 'DW_OP_GNU_implicit_pointer',
248 'DW_OP_GNU_regval_type')
250 def _desc_expression(expr
, die
):
252 if not hasattr(cu
, '_exprparser'):
253 cu
._exprparser
= DWARFExprParser(cu
.structs
)
255 parsed
= cu
._exprparser
.parse_expr(expr
)
256 # TODO: remove this once dwarfdump catches up
257 first_unsupported
= next((i
for (i
, op
) in enumerate(parsed
) if op
.op_name
in UNSUPPORTED_OPS
), None)
258 if first_unsupported
is None:
259 lines
= [_desc_operation(op
.op
, op
.op_name
, op
.args
, cu
) for op
in parsed
]
261 lines
= [_desc_operation(op
.op
, op
.op_name
, op
.args
, cu
) for op
in parsed
[0:first_unsupported
]]
262 start_of_unparsed
= parsed
[first_unsupported
].offset
263 lines
.append("<decoding error> " + " ".join("%02x" % b
for b
in expr
[start_of_unparsed
:]))
264 return ", ".join(lines
)
266 def _desc_datatype(attr
, die
):
269 return _desc_ref(attr
, die
, describe_cpp_datatype(die
))
271 def _get_origin_name(die
):
272 func_die
= die
.get_DIE_from_attribute('DW_AT_abstract_origin')
273 name
= _safe_DIE_linkage_name(func_die
, '')
275 if 'DW_AT_specification' in func_die
.attributes
:
276 name
= _DIE_linkage_name(func_die
.get_DIE_from_attribute('DW_AT_specification'))
277 elif 'DW_AT_abstract_origin' in func_die
.attributes
:
278 return _get_origin_name(func_die
)
281 def _desc_origin(attr
, die
):
282 return _desc_ref(attr
, die
, _get_origin_name(die
))
284 def _desc_spec(attr
, die
):
285 return _desc_ref(attr
, die
,
286 _DIE_linkage_name(die
.get_DIE_from_attribute('DW_AT_specification')))
288 def _desc_value(attr
, die
):
289 return str(attr
.value
)
291 ATTR_DESCRIPTIONS
= dict(
292 DW_AT_language
=lambda attr
, die
: _desc_enum(attr
, ENUM_DW_LANG
),
293 DW_AT_encoding
=lambda attr
, die
: _desc_enum(attr
, ENUM_DW_ATE
),
294 DW_AT_accessibility
=lambda attr
, die
: _desc_enum(attr
, ENUM_DW_ACCESS
),
295 DW_AT_inline
=lambda attr
, die
: _desc_enum(attr
, ENUM_DW_INL
),
296 DW_AT_decl_file
=_desc_decl_file
,
297 DW_AT_decl_line
=_desc_value
,
298 DW_AT_ranges
=_desc_ranges
,
299 DW_AT_location
=_desc_locations
,
300 DW_AT_data_member_location
=lambda attr
, die
: _desc_data(attr
, die
) if attr
.form
.startswith('DW_FORM_data') else _desc_locations(attr
, die
),
301 DW_AT_frame_base
=_desc_locations
,
302 DW_AT_type
=_desc_datatype
,
303 DW_AT_call_line
=_desc_value
,
304 DW_AT_call_file
=_desc_decl_file
,
305 DW_AT_abstract_origin
=_desc_origin
,
306 DW_AT_specification
=_desc_spec
309 class ReadElf(object):
310 """ dump_xxx is used to dump the respective section.
311 Mimics the output of dwarfdump with --verbose
313 def __init__(self
, filename
, file, output
):
315 stream object with the ELF file to read
318 output stream to write to
320 self
.elffile
= ELFFile(file)
322 self
._dwarfinfo
= self
.elffile
.get_dwarf_info()
323 arches
= {"EM_386": "i386", "EM_X86_64": "x86-64"}
324 arch
= arches
[self
.elffile
['e_machine']]
325 bits
= self
.elffile
.elfclass
326 self
._emitline
("%s: file format elf%d-%s" % (filename
, bits
, arch
))
328 def _emit(self
, s
=''):
329 """ Emit an object to output
331 self
.output
.write(str(s
))
333 def _emitline(self
, s
=''):
334 """ Emit an object to output, followed by a newline
336 self
.output
.write(str(s
).rstrip() + '\n')
339 # TODO: DWARF64 will cause discrepancies in hex offset sizes
340 self
._emitline
(".debug_info contents:")
341 for cu
in self
._dwarfinfo
.iter_CUs():
342 if cu
.header
.version
>= 5:
343 ut
= next(k
for (k
,v
) in ENUM_DW_UT
.items() if v
== cu
.header
.unit_type
)
344 unit_type_str
= " unit_type = %s," % ut
348 self
._emitline
("0x%08x: Compile Unit: length = 0x%08x, format = DWARF%d, version = 0x%04x,%s abbr_offset = 0x%04x, addr_size = 0x%02x (next unit at 0x%08x)" %(
350 cu
.header
.unit_length
,
351 cu
.structs
.dwarf_format
,
354 cu
.header
.debug_abbrev_offset
,
355 cu
.header
.address_size
,
356 cu
.cu_offset
+ (4 if cu
.structs
.dwarf_format
== 32 else 12) + cu
.header
.unit_length
))
358 parent
= cu
.get_top_DIE()
359 for die
in cu
.iter_DIEs():
360 if die
.get_parent() == parent
:
362 if not die
.is_null():
363 self
._emitline
("0x%08x: %s [%d] %s %s" % (
367 '*' if die
.has_children
else '',
368 '(0x%08x)' % die
.get_parent().offset
if die
.get_parent() is not None else ''))
369 for attr_name
in die
.attributes
:
370 attr
= die
.attributes
[attr_name
]
371 self
._emitline
(" %s [%s] (%s)" % (attr_name
, attr
.form
, self
.describe_attr_value(die
, attr
)))
373 self
._emitline
("0x%08x: NULL" % (die
.offset
,))
374 parent
= die
.get_parent()
377 def describe_attr_value(self
, die
, attr
):
378 """This describes the attribute value in the way that's compatible
379 with llvm_dwarfdump. Somewhat duplicates the work of describe_attr_value() in descriptions
381 if attr
.name
in ATTR_DESCRIPTIONS
:
382 return ATTR_DESCRIPTIONS
[attr
.name
](attr
, die
)
383 elif attr
.form
in FORM_DESCRIPTIONS
:
384 return FORM_DESCRIPTIONS
[attr
.form
](attr
, die
)
386 return str(attr
.value
)
391 def dump_loclists(self
):
394 def dump_ranges(self
):
397 def dump_v4_rangelist(self
, rangelist
, cu_map
):
398 cu
= cu_map
[rangelist
[0].entry_offset
]
399 addr_str_len
= cu
.header
.address_size
*2
400 base_ip
= _get_cu_base(cu
)
401 for entry
in rangelist
:
402 if isinstance(entry
, RangeEntry
):
403 self
._emitline
("[0x%0*x, 0x%0*x)" % (
405 (0 if entry
.is_absolute
else base_ip
) + entry
.begin_offset
,
407 (0 if entry
.is_absolute
else base_ip
) + entry
.end_offset
))
408 elif isinstance(entry
, elftools
.dwarf
.ranges
.BaseAddressEntry
):
409 base_ip
= entry
.base_address
411 raise NotImplementedError("Unknown object in a range list")
413 def dump_rnglists(self
):
414 self
._emitline
(".debug_rnglists contents:")
415 ranges_sec
= self
._dwarfinfo
.range_lists()
416 if ranges_sec
.version
< 5:
419 cu_map
= {die
.attributes
['DW_AT_ranges'].value
: cu
# Dict from range offset to home CU
420 for cu
in self
._dwarfinfo
.iter_CUs()
421 for die
in cu
.iter_DIEs()
422 if 'DW_AT_ranges' in die
.attributes
}
424 for cu
in ranges_sec
.iter_CUs():
425 self
._emitline
("0x%08x: range list header: length = 0x%08x, format = DWARF%d, version = 0x%04x, addr_size = 0x%02x, seg_size = 0x%02x, offset_entry_count = 0x%08x" % (
428 64 if cu
.is64
else 32,
431 cu
.segment_selector_size
,
433 self
._emitline
("ranges:")
434 if cu
.offset_count
> 0:
435 rangelists
= [ranges_sec
.get_range_list_at_offset_ex(offset
) for offset
in cu
.offsets
]
437 rangelists
= list(ranges_sec
.iter_CU_range_lists_ex(cu
))
438 # We have to parse it completely before dumping, because dwarfdump aligns columns,
439 # no way to do that without some lookahead
440 max_type_len
= max(len(entry
.entry_type
) for rangelist
in rangelists
for entry
in rangelist
)
441 for rangelist
in rangelists
:
442 self
.dump_v5_rangelist(rangelist
, cu_map
, max_type_len
)
444 def dump_v5_rangelist(self
, rangelist
, cu_map
, max_type_len
):
445 cu
= cu_map
[rangelist
[0].entry_offset
]
446 addr_str_len
= cu
.header
.address_size
*2
447 base_ip
= _get_cu_base(cu
)
448 for entry
in rangelist
:
449 type = entry
.entry_type
450 self
._emit
("0x%08x: [%s]: " % (entry
.entry_offset
, type.ljust(max_type_len
)))
451 if type == 'DW_RLE_base_address':
452 base_ip
= entry
.address
453 self
._emitline
("0x%0*x" % (addr_str_len
, base_ip
))
454 elif type == 'DW_RLE_offset_pair':
455 self
._emitline
("0x%0*x, 0x%0*x => [0x%0*x, 0x%0*x)" % (
456 addr_str_len
, entry
.start_offset
,
457 addr_str_len
, entry
.end_offset
,
458 addr_str_len
, entry
.start_offset
+ base_ip
,
459 addr_str_len
, entry
.end_offset
+ base_ip
))
460 elif type == 'DW_RLE_start_length':
461 self
._emitline
("0x%0*x, 0x%0*x => [0x%0*x, 0x%0*x)" % (
462 addr_str_len
, entry
.start_address
,
463 addr_str_len
, entry
.length
,
464 addr_str_len
, entry
.start_address
,
465 addr_str_len
, entry
.start_address
+ entry
.length
))
466 elif type == 'DW_RLE_start_end':
467 self
._emitline
("0x%0*x, 0x%0*x => [0x%0*x, 0x%0*x)" % (
468 addr_str_len
, entry
.start_address
,
469 addr_str_len
, entry
.end_address
,
470 addr_str_len
, entry
.start_address
,
471 addr_str_len
, entry
.end_address
))
473 raise NotImplementedError()
475 self
._emitline
("0x%08x: [DW_RLE_end_of_list ]" % (last
.entry_offset
+ last
.entry_length
,))
477 SCRIPT_DESCRIPTION
= 'Display information about the contents of ELF format files'
478 VERSION_STRING
= '%%(prog)s: based on pyelftools %s' % __version__
480 def main(stream
=None):
481 # parse the command-line arguments and invoke ReadElf
482 argparser
= argparse
.ArgumentParser(
483 usage
='usage: %(prog)s [options] <elf-file>',
484 description
=SCRIPT_DESCRIPTION
,
487 argparser
.add_argument('file',
488 nargs
='?', default
=None,
489 help='ELF file to parse')
490 argparser
.add_argument('-H', '--help',
491 action
='store_true', dest
='help',
492 help='Display this information')
493 argparser
.add_argument('--verbose',
494 action
='store_true', dest
='verbose',
495 help=('For compatibility with dwarfdump. Non-verbose mode is not implemented.'))
498 sections
= ('info', 'loclists', 'rnglists') # 'loc', 'ranges' not implemented yet
499 for section
in sections
:
500 argparser
.add_argument('--debug-%s' % section
,
501 action
='store_true', dest
=section
,
502 help=('Display the contents of DWARF debug_%s section.' % section
))
504 args
= argparser
.parse_args()
506 if args
.help or not args
.file:
507 argparser
.print_help()
510 # A compatibility hack on top of a compatibility hack :(
511 del ENUM_DW_TAG
["DW_TAG_template_type_param"]
512 del ENUM_DW_TAG
["DW_TAG_template_value_param"]
513 ENUM_DW_TAG
['DW_TAG_template_type_parameter'] = 0x2f
514 ENUM_DW_TAG
['DW_TAG_template_value_parameter'] = 0x30
516 with
open(args
.file, 'rb') as file:
518 readelf
= ReadElf(args
.file, file, stream
or sys
.stdout
)
522 readelf
.dump_loclists()
524 readelf
.dump_rnglists()
528 # readelf.dump_ranges()
529 except ELFError
as ex
:
531 sys
.stderr
.write('ELF error: %s\n' % ex
)
532 if args
.show_traceback
:
533 traceback
.print_exc()
536 #-------------------------------------------------------------------------------
537 if __name__
== '__main__':