1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/dwarfinfo.py
4 # DWARFInfo - Main class for accessing DWARF debug information
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
10 from collections
import namedtuple
11 from bisect
import bisect_right
13 from ..construct
.lib
.container
import Container
14 from ..common
.exceptions
import DWARFError
15 from ..common
.utils
import (struct_parse
, dwarf_assert
,
16 parse_cstring_from_stream
)
17 from .structs
import DWARFStructs
18 from .compileunit
import CompileUnit
19 from .abbrevtable
import AbbrevTable
20 from .lineprogram
import LineProgram
21 from .callframe
import CallFrameInfo
22 from .locationlists
import LocationLists
, LocationListsPair
23 from .ranges
import RangeLists
, RangeListsPair
24 from .aranges
import ARanges
25 from .namelut
import NameLUT
26 from .dwarf_util
import _get_base_offset
29 # Describes a debug section
31 # stream: a stream object containing the data of this section
32 # name: section name in the container file
33 # global_offset: the global offset of the section in its container file
34 # size: the size of the section's data, in bytes
35 # address: the virtual address for the section's data
37 # 'name' and 'global_offset' are for descriptional purposes only and
38 # aren't strictly required for the DWARF parsing to work. 'address' is required
39 # to properly decode the special '.eh_frame' format.
41 DebugSectionDescriptor
= namedtuple('DebugSectionDescriptor',
42 'stream name global_offset size address')
45 # Some configuration parameters for the DWARF reader. This exists to allow
46 # DWARFInfo to be independent from any specific file format/container.
49 # boolean flag specifying whether the data in the file is little endian
52 # Machine architecture as a string. For example 'x86' or 'x64'
54 # default_address_size:
55 # The default address size for the container file (sizeof pointer, in bytes)
57 DwarfConfig
= namedtuple('DwarfConfig',
58 'little_endian machine_arch default_address_size')
61 class DWARFInfo(object):
62 """ Acts also as a "context" to other major objects, bridging between
63 various parts of the debug infromation.
79 debug_str_offsets_sec
,
82 debug_rnglists_sec
): # Not parsed for now
87 DebugSectionDescriptor for a section. Pass None for sections
88 that don't exist. These arguments are best given with
92 self
.debug_info_sec
= debug_info_sec
93 self
.debug_aranges_sec
= debug_aranges_sec
94 self
.debug_abbrev_sec
= debug_abbrev_sec
95 self
.debug_frame_sec
= debug_frame_sec
96 self
.eh_frame_sec
= eh_frame_sec
97 self
.debug_str_sec
= debug_str_sec
98 self
.debug_loc_sec
= debug_loc_sec
99 self
.debug_ranges_sec
= debug_ranges_sec
100 self
.debug_line_sec
= debug_line_sec
101 self
.debug_addr_sec
= debug_addr_sec
102 self
.debug_str_offsets_sec
= debug_str_offsets_sec
103 self
.debug_line_str_sec
= debug_line_str_sec
104 self
.debug_pubtypes_sec
= debug_pubtypes_sec
105 self
.debug_pubnames_sec
= debug_pubnames_sec
106 self
.debug_loclists_sec
= debug_loclists_sec
107 self
.debug_rnglists_sec
= debug_rnglists_sec
109 # This is the DWARFStructs the context uses, so it doesn't depend on
110 # DWARF format and address_size (these are determined per CU) - set them
112 self
.structs
= DWARFStructs(
113 little_endian
=self
.config
.little_endian
,
115 address_size
=self
.config
.default_address_size
)
117 # Cache for abbrev tables: a dict keyed by offset
118 self
._abbrevtable
_cache
= {}
120 # Cache of compile units and map of their offsets for bisect lookup.
121 # Access with .iter_CUs(), .get_CU_containing(), and/or .get_CU_at().
123 self
._cu
_offsets
_map
= []
126 def has_debug_info(self
):
127 """ Return whether this contains debug information.
129 It can be not the case when the ELF only contains .eh_frame, which is
130 encoded DWARF but not actually for debugging.
132 return bool(self
.debug_info_sec
)
134 def get_DIE_from_lut_entry(self
, lut_entry
):
135 """ Get the DIE from the pubnames or putbtypes lookup table entry.
138 A NameLUTEntry object from a NameLUT instance (see
139 .get_pubmames and .get_pubtypes methods).
141 cu
= self
.get_CU_at(lut_entry
.cu_ofs
)
142 return self
.get_DIE_from_refaddr(lut_entry
.die_ofs
, cu
)
144 def get_DIE_from_refaddr(self
, refaddr
, cu
=None):
145 """ Given a .debug_info section offset of a DIE, return the DIE.
148 The refaddr may come from a DW_FORM_ref_addr attribute.
151 The compile unit object, if known. If None a search
152 from the closest offset less than refaddr will be performed.
155 cu
= self
.get_CU_containing(refaddr
)
156 return cu
.get_DIE_from_refaddr(refaddr
)
158 def get_CU_containing(self
, refaddr
):
159 """ Find the CU that includes the given reference address in the
163 Either a refaddr of a DIE (possibly from a DW_FORM_ref_addr
164 attribute) or the section offset of a CU (possibly from an
167 This function will parse and cache CUs until the search criteria
168 is met, starting from the closest known offset lessthan or equal
169 to the given address.
173 'CU lookup but no debug info section')
175 0 <= refaddr
< self
.debug_info_sec
.size
,
176 "refaddr %s beyond .debug_info size" % refaddr
)
178 # The CU containing the DIE we desire will be to the right of the
179 # DIE insert point. If we have a CU address, then it will be a
180 # match but the right insert minus one will still be the item.
181 # The first CU starts at offset 0, so start there if cache is empty.
182 i
= bisect_right(self
._cu
_offsets
_map
, refaddr
)
183 start
= self
._cu
_offsets
_map
[i
- 1] if i
> 0 else 0
185 # parse CUs until we find one containing the desired address
186 for cu
in self
._parse
_CUs
_iter
(start
):
187 if cu
.cu_offset
<= refaddr
< cu
.cu_offset
+ cu
.size
:
190 raise ValueError("CU for reference address %s not found" % refaddr
)
192 def get_CU_at(self
, offset
):
193 """ Given a CU header offset, return the parsed CU.
196 The offset may be from an accelerated access table such as
197 the public names, public types, address range table, or
200 This function will directly parse the CU doing no validation of
201 the offset beyond checking the size of the .debug_info section.
205 'CU lookup but no debug info section')
207 0 <= offset
< self
.debug_info_sec
.size
,
208 "offset %s beyond .debug_info size" % offset
)
210 return self
._cached
_CU
_at
_offset
(offset
)
213 """ Yield all the compile units (CompileUnit objects) in the debug info
215 return self
._parse
_CUs
_iter
()
217 def get_abbrev_table(self
, offset
):
218 """ Get an AbbrevTable from the given offset in the debug_abbrev
221 The only verification done on the offset is that it's within the
222 bounds of the section (if not, an exception is raised).
223 It is the caller's responsibility to make sure the offset actually
224 points to a valid abbreviation table.
226 AbbrevTable objects are cached internally (two calls for the same
227 offset will return the same object).
230 offset
< self
.debug_abbrev_sec
.size
,
231 "Offset '0x%x' to abbrev table out of section bounds" % offset
)
232 if offset
not in self
._abbrevtable
_cache
:
233 self
._abbrevtable
_cache
[offset
] = AbbrevTable(
234 structs
=self
.structs
,
235 stream
=self
.debug_abbrev_sec
.stream
,
237 return self
._abbrevtable
_cache
[offset
]
239 def get_string_from_table(self
, offset
):
240 """ Obtain a string from the string table section, given an offset
241 relative to the section.
243 return parse_cstring_from_stream(self
.debug_str_sec
.stream
, offset
)
245 def get_string_from_linetable(self
, offset
):
246 """ Obtain a string from the string table section, given an offset
247 relative to the section.
249 return parse_cstring_from_stream(self
.debug_line_str_sec
.stream
, offset
)
251 def line_program_for_CU(self
, CU
):
252 """ Given a CU object, fetch the line program it points to from the
254 If the CU doesn't point to a line program, return None.
256 Note about directory and file names. They are returned as two collections
257 in the lineprogram object's header - include_directory and file_entry.
259 In DWARFv5, they have introduced a different, extensible format for those
260 collections. So in a lineprogram v5+, there are two more collections in
261 the header - directories and file_names. Those might contain extra DWARFv5
262 information that is not exposed in include_directory and file_entry.
264 # The line program is pointed to by the DW_AT_stmt_list attribute of
265 # the top DIE of a CU.
266 top_DIE
= CU
.get_top_DIE()
267 if 'DW_AT_stmt_list' in top_DIE
.attributes
:
268 return self
._parse
_line
_program
_at
_offset
(
269 top_DIE
.attributes
['DW_AT_stmt_list'].value
, CU
.structs
)
274 """ Does this dwarf info have a dwarf_frame CFI section?
276 return self
.debug_frame_sec
is not None
278 def CFI_entries(self
):
279 """ Get a list of dwarf_frame CFI entries from the .debug_frame section.
282 stream
=self
.debug_frame_sec
.stream
,
283 size
=self
.debug_frame_sec
.size
,
284 address
=self
.debug_frame_sec
.address
,
285 base_structs
=self
.structs
)
286 return cfi
.get_entries()
288 def has_EH_CFI(self
):
289 """ Does this dwarf info have a eh_frame CFI section?
291 return self
.eh_frame_sec
is not None
293 def EH_CFI_entries(self
):
294 """ Get a list of eh_frame CFI entries from the .eh_frame section.
297 stream
=self
.eh_frame_sec
.stream
,
298 size
=self
.eh_frame_sec
.size
,
299 address
=self
.eh_frame_sec
.address
,
300 base_structs
=self
.structs
,
302 return cfi
.get_entries()
304 def get_pubtypes(self
):
306 Returns a NameLUT object that contains information read from the
307 .debug_pubtypes section in the ELF file.
309 NameLUT is essentially a dictionary containing the CU/DIE offsets of
310 each symbol. See the NameLUT doc string for more details.
313 if self
.debug_pubtypes_sec
:
314 return NameLUT(self
.debug_pubtypes_sec
.stream
,
315 self
.debug_pubtypes_sec
.size
,
320 def get_pubnames(self
):
322 Returns a NameLUT object that contains information read from the
323 .debug_pubnames section in the ELF file.
325 NameLUT is essentially a dictionary containing the CU/DIE offsets of
326 each symbol. See the NameLUT doc string for more details.
329 if self
.debug_pubnames_sec
:
330 return NameLUT(self
.debug_pubnames_sec
.stream
,
331 self
.debug_pubnames_sec
.size
,
336 def get_aranges(self
):
337 """ Get an ARanges object representing the .debug_aranges section of
338 the DWARF data, or None if the section doesn't exist
340 if self
.debug_aranges_sec
:
341 return ARanges(self
.debug_aranges_sec
.stream
,
342 self
.debug_aranges_sec
.size
,
347 def location_lists(self
):
348 """ Get a LocationLists object representing the .debug_loc/debug_loclists section of
349 the DWARF data, or None if this section doesn't exist.
351 If both sections exist, it returns a LocationListsPair.
353 if self
.debug_loclists_sec
and self
.debug_loc_sec
is None:
354 return LocationLists(self
.debug_loclists_sec
.stream
, self
.structs
, 5, self
)
355 elif self
.debug_loc_sec
and self
.debug_loclists_sec
is None:
356 return LocationLists(self
.debug_loc_sec
.stream
, self
.structs
, 4, self
)
357 elif self
.debug_loc_sec
and self
.debug_loclists_sec
:
358 return LocationListsPair(self
.debug_loclists_sec
.stream
, self
.debug_loclists_sec
.stream
, self
.structs
, self
)
362 def range_lists(self
):
363 """ Get a RangeLists object representing the .debug_ranges/.debug_rnglists section of
364 the DWARF data, or None if this section doesn't exist.
366 If both sections exist, it returns a RangeListsPair.
368 if self
.debug_rnglists_sec
and self
.debug_ranges_sec
is None:
369 return RangeLists(self
.debug_rnglists_sec
.stream
, self
.structs
, 5, self
)
370 elif self
.debug_ranges_sec
and self
.debug_rnglists_sec
is None:
371 return RangeLists(self
.debug_ranges_sec
.stream
, self
.structs
, 4, self
)
372 elif self
.debug_ranges_sec
and self
.debug_rnglists_sec
:
373 return RangeListsPair(self
.debug_ranges_sec
.stream
, self
.debug_rnglists_sec
.stream
, self
.structs
, self
)
377 def get_addr(self
, cu
, addr_index
):
378 """Provided a CU and an index, retrieves an address from the debug_addr section
380 if not self
.debug_addr_sec
:
381 raise DWARFError('The file does not contain a debug_addr section for indirect address access')
382 # Selectors are not supported, but no assert on that. TODO?
383 cu_addr_base
= _get_base_offset(cu
, 'DW_AT_addr_base')
384 return struct_parse(cu
.structs
.Dwarf_target_addr(''), self
.debug_addr_sec
.stream
, cu_addr_base
+ addr_index
*cu
.header
.address_size
)
386 #------ PRIVATE ------#
388 def _parse_CUs_iter(self
, offset
=0):
389 """ Iterate CU objects in order of appearance in the debug_info section.
392 The offset of the first CU to yield. Additional iterations
393 will return the sequential unit objects.
395 See .iter_CUs(), .get_CU_containing(), and .get_CU_at().
397 if self
.debug_info_sec
is None:
400 while offset
< self
.debug_info_sec
.size
:
401 cu
= self
._cached
_CU
_at
_offset
(offset
)
402 # Compute the offset of the next CU in the section. The unit_length
403 # field of the CU header contains its size not including the length
407 cu
.structs
.initial_length_field_size())
410 def _cached_CU_at_offset(self
, offset
):
411 """ Return the CU with unit header at the given offset into the
412 debug_info section from the cache. If not present, the unit is
413 header is parsed and the object is installed in the cache.
416 The offset of the unit header in the .debug_info section
417 to of the unit to fetch from the cache.
421 # Find the insert point for the requested offset. With bisect_right,
422 # if this entry is present in the cache it will be the prior entry.
423 i
= bisect_right(self
._cu
_offsets
_map
, offset
)
424 if i
>= 1 and offset
== self
._cu
_offsets
_map
[i
- 1]:
425 return self
._cu
_cache
[i
- 1]
427 # Parse the CU and insert the offset and object into the cache.
428 # The ._cu_offsets_map[] contains just the numeric offsets for the
429 # bisect_right search while the parallel indexed ._cu_cache[] holds
430 # the object references.
431 cu
= self
._parse
_CU
_at
_offset
(offset
)
432 self
._cu
_offsets
_map
.insert(i
, offset
)
433 self
._cu
_cache
.insert(i
, cu
)
436 def _parse_CU_at_offset(self
, offset
):
437 """ Parse and return a CU at the given offset in the debug_info stream.
439 # Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v3
440 # states that the first 32-bit word of the CU header determines
441 # whether the CU is represented with 32-bit or 64-bit DWARF format.
443 # So we peek at the first word in the CU header to determine its
444 # dwarf format. Based on it, we then create a new DWARFStructs
445 # instance suitable for this CU and use it to parse the rest.
447 initial_length
= struct_parse(
448 self
.structs
.Dwarf_uint32(''), self
.debug_info_sec
.stream
, offset
)
449 dwarf_format
= 64 if initial_length
== 0xFFFFFFFF else 32
452 # Temporary structs for parsing the header
453 # The structs for the rest of the CU depend on the header data.
455 cu_structs
= DWARFStructs(
456 little_endian
=self
.config
.little_endian
,
457 dwarf_format
=dwarf_format
,
461 cu_header
= struct_parse(
462 cu_structs
.Dwarf_CU_header
, self
.debug_info_sec
.stream
, offset
)
464 # structs for the rest of the CU, taking into account bitness and DWARF version
465 cu_structs
= DWARFStructs(
466 little_endian
=self
.config
.little_endian
,
467 dwarf_format
=dwarf_format
,
468 address_size
=cu_header
['address_size'],
469 dwarf_version
=cu_header
['version'])
471 cu_die_offset
= self
.debug_info_sec
.stream
.tell()
473 self
._is
_supported
_version
(cu_header
['version']),
474 "Expected supported DWARF version. Got '%s'" % cu_header
['version'])
480 cu_die_offset
=cu_die_offset
)
482 def _is_supported_version(self
, version
):
483 """ DWARF version supported by this parser
485 return 2 <= version
<= 5
487 def _parse_line_program_at_offset(self
, debug_line_offset
, structs
):
488 """ Given an offset to the .debug_line section, parse the line program
489 starting at this offset in the section and return it.
490 structs is the DWARFStructs object used to do this parsing.
492 lineprog_header
= struct_parse(
493 structs
.Dwarf_lineprog_header
,
494 self
.debug_line_sec
.stream
,
497 # DWARF5: resolve names
498 def resolve_strings(self
, lineprog_header
, format_field
, data_field
):
499 if lineprog_header
.get(format_field
, False):
500 data
= lineprog_header
[data_field
]
501 for field
in lineprog_header
[format_field
]:
502 def replace_value(data
, content_type
, replacer
):
504 entry
[content_type
] = replacer(entry
[content_type
])
506 if field
.form
== 'DW_FORM_line_strp':
507 replace_value(data
, field
.content_type
, self
.get_string_from_linetable
)
508 elif field
.form
== 'DW_FORM_strp':
509 replace_value(data
, field
.content_type
, self
.get_string_from_table
)
510 elif field
.form
in ('DW_FORM_strp_sup', 'DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4'):
511 raise NotImplementedError()
513 resolve_strings(self
, lineprog_header
, 'directory_entry_format', 'directories')
514 resolve_strings(self
, lineprog_header
, 'file_name_entry_format', 'file_names')
516 # DWARF5: provide compatible file/directory name arrays for legacy lineprogram consumers
517 if lineprog_header
.get('directories', False):
518 lineprog_header
.include_directory
= tuple(d
.DW_LNCT_path
for d
in lineprog_header
.directories
)
519 if lineprog_header
.get('file_names', False):
520 lineprog_header
.file_entry
= tuple(
522 'name':e
.get('DW_LNCT_path'),
523 'dir_index': e
.get('DW_LNCT_directory_index'),
524 'mtime': e
.get('DW_LNCT_timestamp'),
525 'length': e
.get('DW_LNCT_size')})
526 for e
in lineprog_header
.file_names
)
528 # Calculate the offset to the next line program (see DWARF 6.2.4)
529 end_offset
= ( debug_line_offset
+ lineprog_header
['unit_length'] +
530 structs
.initial_length_field_size())
533 header
=lineprog_header
,
534 stream
=self
.debug_line_sec
.stream
,
536 program_start_offset
=self
.debug_line_sec
.stream
.tell(),
537 program_end_offset
=end_offset
)