0986a385c1a555f987702ad13e3d8c26f60eeef8
[pyelftools.git] / elftools / dwarf / datatype_cpp.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/datatype_cpp.py
3 #
4 # First draft at restoring the source level name a C/C++ datatype
5 # from DWARF data. Aiming at compatibility with llvm-dwarfdump v15.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from ..common.py3compat import bytes2str
11
12 cpp_symbols = dict(
13 pointer = "*",
14 reference = "&",
15 const = "const")
16
17 def describe_cpp_datatype(var_die):
18 return str(parse_cpp_datatype(var_die))
19
20 def parse_cpp_datatype(var_die):
21 """Given a DIE that describes a variable, a parameter, or a member
22 with DW_AT_type in it, tries to return the C++ datatype as a string
23
24 Returns a TypeDesc.
25
26 Does not follow typedefs, doesn't resolve array element types
27 or struct members. Not good for a debugger.
28 """
29 t = TypeDesc()
30
31 if not 'DW_AT_type' in var_die.attributes:
32 t.tag = ''
33 return t
34
35 type_die = var_die.get_DIE_from_attribute('DW_AT_type')
36
37 mods = []
38 # Unlike readelf, dwarfdump doesn't chase typedefs
39 while type_die.tag in ('DW_TAG_const_type', 'DW_TAG_pointer_type', 'DW_TAG_reference_type'):
40 modifier = _strip_type_tag(type_die) # const/reference/pointer
41 mods.insert(0, modifier)
42 if not 'DW_AT_type' in type_die.attributes: # void* is encoded as a pointer to nothing
43 t.name = t.tag = "void"
44 t.modifiers = tuple(mods)
45 return t
46 type_die = type_die.get_DIE_from_attribute('DW_AT_type')
47
48 # From this point on, type_die doesn't change
49 t.tag = _strip_type_tag(type_die)
50 t.modifiers = tuple(mods)
51
52 if t.tag in ('ptr_to_member', 'subroutine'):
53 if t.tag == 'ptr_to_member':
54 ptr_prefix = DIE_name(type_die.get_DIE_from_attribute('DW_AT_containing_type')) + "::"
55 type_die = type_die.get_DIE_from_attribute('DW_AT_type')
56 elif "DW_AT_object_pointer" in type_die.attributes: # Older compiler... Subroutine, but with an object pointer
57 ptr_prefix = DIE_name(DIE_type(DIE_type(type_die.get_DIE_from_attribute('DW_AT_object_pointer')))) + "::"
58 else: # Not a pointer to member
59 ptr_prefix = ''
60
61 if t.tag == 'subroutine':
62 params = tuple(format_function_param(p, p) for p in type_die.iter_children() if p.tag in ("DW_TAG_formal_parameter", "DW_TAG_unspecified_parameters") and 'DW_AT_artificial' not in p.attributes)
63 params = ", ".join(params)
64 if 'DW_AT_type' in type_die.attributes:
65 retval_type = parse_cpp_datatype(type_die)
66 is_pointer = retval_type.modifiers and retval_type.modifiers[-1] == 'pointer'
67 retval_type = str(retval_type)
68 if not is_pointer:
69 retval_type += " "
70 else:
71 retval_type = "void "
72
73 if len(mods) and mods[-1] == 'pointer':
74 mods.pop()
75 t.modifiers = tuple(mods)
76 t.name = "%s(%s*)(%s)" % (retval_type, ptr_prefix, params)
77 else:
78 t.name = "%s(%s)" % (retval_type, params)
79 return t
80 elif DIE_is_ptr_to_member_struct(type_die):
81 dt = parse_cpp_datatype(next(type_die.iter_children())) # The first element is pfn, a function pointer with a this
82 dt.modifiers = tuple(dt.modifiers[:-1]) # Pop the extra pointer
83 dt.tag = "ptr_to_member_type" # Not a function pointer per se
84 return dt
85 elif t.tag == 'array':
86 t.dimensions = (_array_subtype_size(sub)
87 for sub
88 in type_die.iter_children()
89 if sub.tag == 'DW_TAG_subrange_type')
90 t.name = describe_cpp_datatype(type_die)
91 return t
92
93 # Now the nonfunction types
94 # Blank name is sometimes legal (unnamed unions, etc)
95
96 t.name = safe_DIE_name(type_die, t.tag + " ")
97
98 # Check the nesting - important for parameters
99 parent = type_die.get_parent()
100 scopes = list()
101 while parent.tag in ('DW_TAG_class_type', 'DW_TAG_structure_type', 'DW_TAG_union_type', 'DW_TAG_namespace'):
102 scopes.insert(0, safe_DIE_name(parent, _strip_type_tag(parent) + " "))
103 # If unnamed scope, fall back to scope type - like "structure "
104 parent = parent.get_parent()
105 t.scopes = tuple(scopes)
106
107 return t
108
109 #--------------------------------------------------
110
111 class TypeDesc(object):
112 """ Encapsulates a description of a datatype, as parsed from DWARF DIEs.
113 Not enough to display the variable in the debugger, but enough
114 to produce a type description string similar to those of llvm-dwarfdump.
115
116 name - name for primitive datatypes, element name for arrays, the
117 whole name for functions and function pouinters
118
119 modifiers - a collection of "const"/"pointer"/"reference", from the
120 chain of DIEs preceeding the real type DIE
121
122 scopes - a collection of struct/class/namespace names, parents of the
123 real type DIE
124
125 tag - the tag of the real type DIE, stripped of initial DW_TAG_ and
126 final _type
127
128 dimensions - the collection of array dimensions, if the type is an
129 array. -1 means an array of unknown dimension.
130
131 """
132 def __init__(self):
133 self.name = None
134 self.modifiers = () # Reads left to right
135 self.scopes = () # Reads left to right
136 self.tag = None
137 self.dimensions = None
138
139 def __str__(self):
140 # Some reference points from dwarfdump:
141 # const->pointer->const->char = const char *const
142 # const->reference->const->int = const const int &
143 # const->reference->int = const int &
144 name = str(self.name)
145 mods = self.modifiers
146
147 parts = []
148 # Initial const applies to the var ifself, other consts apply to the pointee
149 if len(mods) and mods[0] == 'const':
150 parts.append("const")
151 mods = mods[1:]
152
153 # ref->const in the end, const goes in front
154 if mods[-2:] == ("reference", "const"):
155 parts.append("const")
156 mods = mods[0:-1]
157
158 if self.scopes:
159 name = '::'.join(self.scopes)+'::' + name
160 parts.append(name)
161
162 if len(mods):
163 parts.append("".join(cpp_symbols[mod] for mod in mods))
164
165 if self.dimensions:
166 dims = "".join('[%s]' % (str(dim) if dim > 0 else '',)
167 for dim in self.dimensions)
168 else:
169 dims = ''
170
171 return " ".join(parts)+dims
172
173 def DIE_name(die):
174 return bytes2str(die.attributes['DW_AT_name'].value)
175
176 def safe_DIE_name(die, default = ''):
177 return bytes2str(die.attributes['DW_AT_name'].value) if 'DW_AT_name' in die.attributes else default
178
179 def DIE_type(die):
180 return die.get_DIE_from_attribute("DW_AT_type")
181
182 class ClassDesc(object):
183 def __init__(self):
184 self.scopes = ()
185 self.const_member = False
186
187 def get_class_spec_if_member(func_spec, the_func):
188 if 'DW_AT_object_pointer' in the_func.attributes:
189 this_param = the_func.get_DIE_from_attribute('DW_AT_object_pointer')
190 this_type = parse_cpp_datatype(this_param)
191 class_spec = ClassDesc()
192 class_spec.scopes = this_type.scopes + (this_type.name,)
193 class_spec.const_member = any(("const", "pointer") == this_type.modifiers[i:i+2]
194 for i in range(len(this_type.modifiers))) # const -> pointer -> const for this arg of const
195 return class_spec
196
197 # Check the parent element chain - could be a class
198 parent = func_spec.get_parent()
199
200 scopes = []
201 while parent.tag in ("DW_TAG_class_type", "DW_TAG_structure_type", "DW_TAG_namespace"):
202 scopes.insert(0, DIE_name(parent))
203 parent = parent.get_parent()
204 if scopes:
205 cs = ClassDesc()
206 cs.scopes = tuple(scopes)
207 return cs
208
209 return None
210
211 def format_function_param(param_spec, param):
212 if param_spec.tag == 'DW_TAG_formal_parameter':
213 if 'DW_AT_name' in param.attributes:
214 name = DIE_name(param)
215 elif 'DW_AT_name' in param_spec.attributes:
216 name = DIE_name(param_spec)
217 else:
218 name = None
219 type = parse_cpp_datatype(param_spec)
220 return str(type)
221 else: # unspecified_parameters AKA variadic
222 return "..."
223
224 def DIE_is_ptr_to_member_struct(type_die):
225 if type_die.tag == 'DW_TAG_structure_type':
226 members = tuple(die for die in type_die.iter_children() if die.tag == "DW_TAG_member")
227 return len(members) == 2 and safe_DIE_name(members[0]) == "__pfn" and safe_DIE_name(members[1]) == "__delta"
228 return False
229
230 def _strip_type_tag(die):
231 """Given a DIE with DW_TAG_foo_type, returns foo"""
232 return die.tag[7:-5]
233
234 def _array_subtype_size(sub):
235 if 'DW_AT_upper_bound' in sub.attributes:
236 return sub.attributes['DW_AT_upper_bound'].value + 1
237 if 'DW_AT_count' in sub.attributes:
238 return sub.attributes['DW_AT_count'].value
239 else:
240 return -1
241