False positive on LocationParser.attribute_has_location() (#501)
authorSeva Alekseyev <sevaa@yarxi.ru>
Thu, 14 Sep 2023 13:27:12 +0000 (09:27 -0400)
committerGitHub <noreply@github.com>
Thu, 14 Sep 2023 13:27:12 +0000 (06:27 -0700)
* Fix for LocationParser.attribute_has_location

* Test for DWARFv3, section 5.6.6

* V5 ctrparts to GNU attribs that may be locations

* Test for the false negative

* Test explanation

.gitignore
elftools/dwarf/locationlists.py
test/test_dwarf_locationattr.py [new file with mode: 0644]

index 999a8e4549df4eef756b0c8deee4d2c956b5b888..44b6e44d3857ae05fb133bf4313d55abc4d58130 100644 (file)
@@ -9,6 +9,7 @@ MANIFEST
 *.sublime-workspace
 *.egg-info
 .vscode
+temp
 
 
 
index 9a349779baa02eecf260f846392a2edcd50aa65b..6397738293e9fa636f3bcbb91182ef0c5d715907 100644 (file)
@@ -325,10 +325,19 @@ class LocationParser(object):
 
     @staticmethod
     def _attribute_has_loc_list(attr, dwarf_version):
-        return ((dwarf_version < 4 and
+        return (((dwarf_version < 4 and
                  attr.form in ('DW_FORM_data1', 'DW_FORM_data2', 'DW_FORM_data4', 'DW_FORM_data8') and
                  not attr.name == 'DW_AT_const_value') or
-                attr.form in ('DW_FORM_sec_offset', 'DW_FORM_loclistx'))
+                attr.form in ('DW_FORM_sec_offset', 'DW_FORM_loclistx')) and
+                not LocationParser._attribute_is_member_offset(attr, dwarf_version))
+    
+    # Starting with DWARF3, DW_AT_data_member_location may contain an integer offset
+    # instead of a location expression. Need to prevent false positives on attribute_has_location().
+    @staticmethod
+    def _attribute_is_member_offset(attr, dwarf_version):
+        return (dwarf_version >= 3 and
+            attr.name == 'DW_AT_data_member_location' and
+            attr.form in ('DW_FORM_data1', 'DW_FORM_data2', 'DW_FORM_data4', 'DW_FORM_data8', 'DW_FORM_sdata', 'DW_FORM_udata'))
 
     @staticmethod
     def _attribute_is_loclistptr_class(attr):
@@ -341,4 +350,8 @@ class LocationParser(object):
                                'DW_AT_call_value',
                                'DW_AT_GNU_call_site_value',
                                'DW_AT_GNU_call_site_target',
-                               'DW_AT_GNU_call_site_data_value'))
+                               'DW_AT_GNU_call_site_data_value',
+                               'DW_AT_call_target',
+                               'DW_AT_call_target_clobbered',
+                               'DW_AT_call_data_location',
+                               'DW_AT_call_data_value'))
diff --git a/test/test_dwarf_locationattr.py b/test/test_dwarf_locationattr.py
new file mode 100644 (file)
index 0000000..6d764d2
--- /dev/null
@@ -0,0 +1,40 @@
+#-------------------------------------------------------------------------------
+# elftools tests
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+import unittest
+
+from elftools.dwarf.locationlists import LocationParser
+from elftools.dwarf.die import AttributeValue
+
+class TestLocationAttrubute(unittest.TestCase):
+    def test_has_location(self):
+        # This attribute comes from a DWARFv3 binary that doesn't have a location lists
+        # section. Before the patch, pyelftools would interpret it as an attribute with a
+        # location, more specifically with a location list offset (as opposed to an expression).
+        # Meanwhile, by the spec, DW_AT_data_member_location is not even capable
+        # of storing a location list offset (since structure layout
+        # can't vary by code location). DWARFv3 spec also provides that DW_AT_data_member_location
+        # may be a small integer with an offset from the structure's base address, and that
+        # seems to be the case here. Ergo, pyelftools should not claim this attribute a location.
+        # Since the location/loclist parse function uses the same check, ths fix will 
+        # prevent such attribute values from being misparsed, also.
+        #
+        # The notion that member location in a structure had to be a DWARF expression
+        # was a misnomer all along - how often does one see a compound datatype
+        # with a static member set but a dynamic layout?
+        attr = AttributeValue(name='DW_AT_data_member_location', form='DW_FORM_data1', value=0, raw_value=0, offset=402, indirection_length=0)
+        self.assertFalse(LocationParser.attribute_has_location(attr, 3))
+
+        # This attribute comes from a DWARFv5 binary. Its form unambiguously tells us it's a
+        # location expression. Before the patch, pyelftools would not recognize it as one,
+        # because it has a hard-coded list of attributes that may contain a location, and
+        # DW_AT_call_target was not in that list.
+        attr = AttributeValue(name='DW_AT_call_target', form='DW_FORM_exprloc', value=[80], raw_value=[80], offset=8509, indirection_length=0)
+        self.assertTrue(LocationParser.attribute_has_location(attr, 5))
+
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file