test/run_readelf_tests.py

   1 #!/usr/bin/env python
   2 #-------------------------------------------------------------------------------
   3 # test/run_readelf_tests.py
   4 #
   5 # Automatic test runner for elftools & readelf
   6 #
   7 # Eli Bendersky (eliben@gmail.com)
   8 # This code is in the public domain
   9 #-------------------------------------------------------------------------------
  10 import argparse
  11 from difflib import SequenceMatcher
  12 import logging
  13 from multiprocessing import Pool
  14 import os
  15 import platform
  16 import re
  17 import sys
  18 import time
  19
  20 from utils import run_exe, is_in_rootdir, dump_output_to_temp_files
  21
  22 # Make it possible to run this file from the root dir of pyelftools without
  23 # installing pyelftools; useful for CI testing, etc.
  24 sys.path[0:0] = ['.']
  25
  26 # Create a global logger object
  27 testlog = logging.getLogger('run_tests')
  28 testlog.setLevel(logging.DEBUG)
  29 testlog.addHandler(logging.StreamHandler(sys.stdout))
  30
  31 # Set the path for calling readelf. We carry our own version of readelf around,
  32 # because binutils tend to change its output even between daily builds of the
  33 # same minor release and keeping track is a headache.
  34 if platform.system() == "Darwin": # MacOS
  35     READELF_PATH = 'greadelf'
  36 elif platform.system() == "Windows":
  37     # Point the environment variable READELF at Cygwin's readelf.exe, or some other Windows build
  38     READELF_PATH = os.environ.get('READELF', "readelf.exe")
  39 else:
  40     READELF_PATH = 'test/external_tools/readelf'
  41     if not os.path.exists(READELF_PATH):
  42         READELF_PATH = 'readelf'
  43
  44
  45 def discover_testfiles(rootdir):
  46     """ Discover test files in the given directory. Yield them one by one.
  47     """
  48     for filename in os.listdir(rootdir):
  49         _, ext = os.path.splitext(filename)
  50         if ext == '.elf':
  51             yield os.path.join(rootdir, filename)
  52
  53
  54 def run_test_on_file(filename, verbose=False, opt=None):
  55     """ Runs a test on the given input filename. Return True if all test
  56         runs succeeded.
  57         If opt is specified, rather that going over the whole
  58         set of supported readelf options, the test will only
  59         run for one option.
  60     """
  61     success = True
  62     testlog.info("Test file '%s'" % filename)
  63     if opt is None:
  64         options = [
  65             '-e', '-d', '-s', '-n', '-r', '-x.text', '-p.shstrtab', '-V',
  66             '--debug-dump=info', '--debug-dump=decodedline',
  67             '--debug-dump=frames', '--debug-dump=frames-interp',
  68             '--debug-dump=aranges', '--debug-dump=pubtypes',
  69             '--debug-dump=pubnames', '--debug-dump=loc',
  70             '--debug-dump=Ranges'
  71             ]
  72     else:
  73         options = [opt]
  74
  75     for option in options:
  76         if verbose: testlog.info("..option='%s'" % option)
  77
  78         # TODO(zlobober): this is a dirty hack to make tests work for ELF core
  79         # dump notes. Making it work properly requires a pretty deep
  80         # investigation of how original readelf formats the output.
  81         if "core" in filename and option == "-n":
  82             if verbose:
  83                 testlog.warning("....will fail because corresponding part of readelf.py is not implemented yet")
  84                 testlog.info('.......................SKIPPED')
  85             continue
  86
  87         # sevaa says: there is another shorted out test; in dwarf_lineprogramv5.elf, the two bytes at 0x2072 were
  88         # patched from 0x07 0x10 to 00 00.
  89         # Those represented the second instruction in the first FDE in .eh_frame. This changed the instruction
  90         # from "DW_CFA_undefined 16" to two NOPs.
  91         # GNU readelf had a bug here, had to work around. See PR #411.
  92
  93         # stdouts will be a 2-element list: output of readelf and output
  94         # of scripts/readelf.py
  95         stdouts = []
  96         for exe_path in [READELF_PATH, 'scripts/readelf.py']:
  97             args = [option, filename]
  98             if verbose: testlog.info("....executing: '%s %s'" % (
  99                 exe_path, ' '.join(args)))
 100             t1 = time.time()
 101             rc, stdout = run_exe(exe_path, args)
 102             if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,))
 103             if rc != 0:
 104                 testlog.error("@@ aborting - '%s %s' returned '%s'" % (exe_path, option, rc))
 105                 return False
 106             stdouts.append(stdout)
 107         if verbose: testlog.info('....comparing output...')
 108         t1 = time.time()
 109         rc, errmsg = compare_output(*stdouts)
 110         if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,))
 111         if rc:
 112             if verbose: testlog.info('.......................SUCCESS')
 113         else:
 114             success = False
 115             testlog.info('.......................FAIL')
 116             testlog.info('....for file %s' % filename)
 117             testlog.info('....for option "%s"' % option)
 118             testlog.info('....Output #1 is readelf, Output #2 is pyelftools')
 119             testlog.info('@@ ' + errmsg)
 120             dump_output_to_temp_files(testlog, *stdouts)
 121     return success
 122
 123
 124 def compare_output(s1, s2):
 125     """ Compare stdout strings s1 and s2.
 126         s1 is from readelf, s2 from elftools readelf.py
 127         Return pair success, errmsg. If comparison succeeds, success is True
 128         and errmsg is empty. Otherwise success is False and errmsg holds a
 129         description of the mismatch.
 130
 131         Note: this function contains some rather horrible hacks to ignore
 132         differences which are not important for the verification of pyelftools.
 133         This is due to some intricacies of binutils's readelf which pyelftools
 134         doesn't currently implement, features that binutils doesn't support,
 135         or silly inconsistencies in the output of readelf, which I was reluctant
 136         to replicate. Read the documentation for more details.
 137     """
 138     def prepare_lines(s):
 139         return [line for line in s.lower().splitlines() if line.strip() != '']
 140
 141     lines1 = prepare_lines(s1)
 142     lines2 = prepare_lines(s2)
 143
 144     flag_in_debug_line_section = False
 145
 146     if len(lines1) != len(lines2):
 147         return False, 'Number of lines different: %s vs %s' % (
 148                 len(lines1), len(lines2))
 149
 150     # Position of the View column in the output file, if parsing readelf..decodedline
 151     # output, and the GNU readelf output contains the View column. Otherwise stays -1.
 152     view_col_position = -1
 153     for i in range(len(lines1)):
 154         if lines1[i].endswith('debug_line section:'):
 155             # .debug_line or .zdebug_line
 156             flag_in_debug_line_section = True
 157
 158         # readelf spelling error for GNU property notes
 159         lines1[i] = lines1[i].replace('procesor-specific type', 'processor-specific type')
 160
 161         # The view column position may change from CU to CU:
 162         if view_col_position >= 0 and lines1[i].startswith('cu:'):
 163             view_col_position = -1
 164
 165         # Check if readelf..decodedline output line contains the view column
 166         if flag_in_debug_line_section and lines1[i].startswith('file name') and view_col_position < 0:
 167             view_col_position = lines1[i].find("view")
 168             stmt_col_position = lines1[i].find("stmt")
 169
 170         # Excise the View column from the table, if any.
 171         # View_col_position is only set to a nonzero number if one of the previous
 172         # lines was a table header line with a "view" in it.
 173         # We assume careful formatting on GNU readelf's part - View column values
 174         # are not out of line with the View header.
 175         if view_col_position >= 0 and not lines1[i].endswith(':'):
 176             lines1[i] = lines1[i][:view_col_position] + lines1[i][stmt_col_position:]
 177
 178         # Compare ignoring whitespace
 179         lines1_parts = lines1[i].split()
 180         lines2_parts = lines2[i].split()
 181
 182         if ''.join(lines1_parts) != ''.join(lines2_parts):
 183             ok = False
 184
 185             try:
 186                 # Ignore difference in precision of hex representation in the
 187                 # last part (i.e. 008f3b vs 8f3b)
 188                 if (''.join(lines1_parts[:-1]) == ''.join(lines2_parts[:-1]) and
 189                     int(lines1_parts[-1], 16) == int(lines2_parts[-1], 16)):
 190                     ok = True
 191             except ValueError:
 192                 pass
 193
 194             sm = SequenceMatcher()
 195             sm.set_seqs(lines1[i], lines2[i])
 196             changes = sm.get_opcodes()
 197             if '[...]' in lines1[i]:
 198                 # Special case truncations with ellipsis like these:
 199                 #     .note.gnu.bu[...]        redelf
 200                 #     .note.gnu.build-i        pyelftools
 201                 # Or more complex for symbols with versions, like these:
 202                 #     _unw[...]@gcc_3.0        readelf
 203                 #     _unwind_resume@gcc_3.0   pyelftools
 204                 for p1, p2 in zip(lines1_parts, lines2_parts):
 205                     dots_start = p1.find('[...]')
 206                     if dots_start != -1:
 207                         break
 208                 ok = p1.endswith('[...]') and p1[:dots_start] == p2[:dots_start]
 209                 if not ok:
 210                     dots_end = dots_start + 5
 211                     if len(p1) > dots_end and p1[dots_end] == '@':
 212                         ok = (    p1[:dots_start] == p2[:dots_start]
 213                               and p1[p1.rfind('@'):] == p2[p2.rfind('@'):])
 214             elif 'at_const_value' in lines1[i]:
 215                 # On 32-bit machines, readelf doesn't correctly represent
 216                 # some boundary LEB128 numbers
 217                 val = lines2_parts[-1]
 218                 num2 = int(val, 16 if val.startswith('0x') else 10)
 219                 if num2 <= -2**31 and '32' in platform.architecture()[0]:
 220                     ok = True
 221             elif 'os/abi' in lines1[i]:
 222                 if 'unix - gnu' in lines1[i] and 'unix - linux' in lines2[i]:
 223                     ok = True
 224             elif len(lines1_parts) == 3 and lines1_parts[2] == 'nt_gnu_property_type_0':
 225                 # readelf does not seem to print a readable description for this
 226                 ok = lines1_parts == lines2_parts[:3]
 227             else:
 228                 for s in ('t (tls)', 'l (large)', 'd (mbind)'):
 229                     if s in lines1[i] or s in lines2[i]:
 230                         ok = True
 231                         break
 232             if not ok:
 233                 errmsg = 'Mismatch on line #%s:\n>>%s<<\n>>%s<<\n (%r)' % (
 234                     i, lines1[i], lines2[i], changes)
 235                 return False, errmsg
 236     return True, ''
 237
 238
 239 def main():
 240     if not is_in_rootdir():
 241         testlog.error('Error: Please run me from the root dir of pyelftools!')
 242         return 1
 243
 244     argparser = argparse.ArgumentParser(
 245         usage='usage: %(prog)s [options] [file] [file] ...',
 246         prog='run_readelf_tests.py')
 247     argparser.add_argument('files', nargs='*', help='files to run tests on')
 248     argparser.add_argument(
 249         '--parallel', action='store_true',
 250         help='run tests in parallel; always runs all tests w/o verbose')
 251     argparser.add_argument('-V', '--verbose',
 252                            action='store_true', dest='verbose',
 253                            help='verbose output')
 254     argparser.add_argument(
 255         '-k', '--keep-going',
 256         action='store_true', dest='keep_going',
 257         help="Run all tests, don't stop at the first failure")
 258     argparser.add_argument('--opt',
 259         action='store', dest='opt', metavar='<readelf-option>',
 260         help= 'Limit the test one one readelf option.')
 261     args = argparser.parse_args()
 262
 263     if args.parallel:
 264         if args.verbose or args.keep_going == False:
 265             print('WARNING: parallel mode disables verbosity and always keeps going')
 266
 267     if args.verbose:
 268         testlog.info('Running in verbose mode')
 269         testlog.info('Python executable = %s' % sys.executable)
 270         testlog.info('readelf path = %s' % READELF_PATH)
 271         testlog.info('Given list of files: %s' % args.files)
 272
 273     # If file names are given as command-line arguments, only these files
 274     # are taken as inputs. Otherwise, autodiscovery is performed.
 275     if len(args.files) > 0:
 276         filenames = args.files
 277     else:
 278         filenames = sorted(discover_testfiles('test/testfiles_for_readelf'))
 279
 280     if len(filenames) > 1 and args.parallel:
 281         pool = Pool()
 282         results = pool.map(run_test_on_file, filenames)
 283         failures = results.count(False)
 284     else:
 285         failures = 0
 286         for filename in filenames:
 287             if not run_test_on_file(filename, args.verbose, args.opt):
 288                 failures += 1
 289                 if not args.keep_going:
 290                     break
 291
 292     if failures == 0:
 293         testlog.info('\nConclusion: SUCCESS')
 294         return 0
 295     elif args.keep_going:
 296         testlog.info('\nConclusion: FAIL ({}/{})'.format(
 297             failures, len(filenames)))
 298         return 1
 299     else:
 300         testlog.info('\nConclusion: FAIL')
 301         return 1
 302
 303
 304 if __name__ == '__main__':
 305     sys.exit(main())