test/run_readelf_tests.py

   1 #!/usr/bin/env python
   2 #-------------------------------------------------------------------------------
   3 # test/run_readelf_tests.py
   4 #
   5 # Automatic test runner for elftools & readelf
   6 #
   7 # Eli Bendersky (eliben@gmail.com)
   8 # This code is in the public domain
   9 #-------------------------------------------------------------------------------
  10 import argparse
  11 from difflib import SequenceMatcher
  12 import logging
  13 from multiprocessing import Pool
  14 import os
  15 import platform
  16 import re
  17 import sys
  18 import time
  19
  20 from utils import run_exe, is_in_rootdir, dump_output_to_temp_files
  21
  22 # Make it possible to run this file from the root dir of pyelftools without
  23 # installing pyelftools; useful for CI testing, etc.
  24 sys.path[0:0] = ['.']
  25
  26 # Create a global logger object
  27 testlog = logging.getLogger('run_tests')
  28 testlog.setLevel(logging.DEBUG)
  29 testlog.addHandler(logging.StreamHandler(sys.stdout))
  30
  31 # Set the path for calling readelf. We carry our own version of readelf around,
  32 # because binutils tend to change its output even between daily builds of the
  33 # same minor release and keeping track is a headache.
  34 if platform.system() == "Darwin": # MacOS
  35     READELF_PATH = 'greadelf'
  36 elif platform.system() == "Windows":
  37     # Point the environment variable READELF at Cygwin's readelf.exe, or some other Windows build
  38     READELF_PATH = os.environ.get('READELF', "readelf.exe")
  39 else:
  40     READELF_PATH = 'test/external_tools/readelf'
  41     if not os.path.exists(READELF_PATH):
  42         READELF_PATH = 'readelf'
  43
  44
  45 def discover_testfiles(rootdir):
  46     """ Discover test files in the given directory. Yield them one by one.
  47     """
  48     for filename in os.listdir(rootdir):
  49         _, ext = os.path.splitext(filename)
  50         if ext == '.elf':
  51             yield os.path.join(rootdir, filename)
  52
  53
  54 def run_test_on_file(filename, verbose=False, opt=None):
  55     """ Runs a test on the given input filename. Return True if all test
  56         runs succeeded.
  57         If opt is specified, rather that going over the whole
  58         set of supported readelf options, the test will only
  59         run for one option.
  60     """
  61     success = True
  62     testlog.info("Test file '%s'" % filename)
  63     if opt is None:
  64         options = [
  65             '-e', '-d', '-s', '-n', '-r', '-x.text', '-p.shstrtab', '-V',
  66             '--debug-dump=info', '--debug-dump=decodedline',
  67             '--debug-dump=frames', '--debug-dump=frames-interp',
  68             '--debug-dump=aranges', '--debug-dump=pubtypes',
  69             '--debug-dump=pubnames', '--debug-dump=loc'
  70             ]
  71     else:
  72         options = [opt]
  73
  74     for option in options:
  75         if verbose: testlog.info("..option='%s'" % option)
  76
  77         # TODO(zlobober): this is a dirty hack to make tests work for ELF core
  78         # dump notes. Making it work properly requires a pretty deep
  79         # investigation of how original readelf formats the output.
  80         if "core" in filename and option == "-n":
  81             if verbose:
  82                 testlog.warning("....will fail because corresponding part of readelf.py is not implemented yet")
  83                 testlog.info('.......................SKIPPED')
  84             continue
  85
  86         # sevaa says: there is another shorted out test; in dwarf_lineprogramv5.elf, the two bytes at 0x2072 were
  87         # patched from 0x07 0x10 to 00 00.
  88         # Those represented the second instruction in the first FDE in .eh_frame. This changed the instruction
  89         # from "DW_CFA_undefined 16" to two NOPs.
  90         # GNU readelf had a bug here, had to work around. See PR #411.
  91
  92         # stdouts will be a 2-element list: output of readelf and output
  93         # of scripts/readelf.py
  94         stdouts = []
  95         for exe_path in [READELF_PATH, 'scripts/readelf.py']:
  96             args = [option, filename]
  97             if verbose: testlog.info("....executing: '%s %s'" % (
  98                 exe_path, ' '.join(args)))
  99             t1 = time.time()
 100             rc, stdout = run_exe(exe_path, args)
 101             if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,))
 102             if rc != 0:
 103                 testlog.error("@@ aborting - '%s %s' returned '%s'" % (exe_path, option, rc))
 104                 return False
 105             stdouts.append(stdout)
 106         if verbose: testlog.info('....comparing output...')
 107         t1 = time.time()
 108         rc, errmsg = compare_output(*stdouts)
 109         if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,))
 110         if rc:
 111             if verbose: testlog.info('.......................SUCCESS')
 112         else:
 113             success = False
 114             testlog.info('.......................FAIL')
 115             testlog.info('....for file %s' % filename)
 116             testlog.info('....for option "%s"' % option)
 117             testlog.info('....Output #1 is readelf, Output #2 is pyelftools')
 118             testlog.info('@@ ' + errmsg)
 119             dump_output_to_temp_files(testlog, *stdouts)
 120     return success
 121
 122
 123 def compare_output(s1, s2):
 124     """ Compare stdout strings s1 and s2.
 125         s1 is from readelf, s2 from elftools readelf.py
 126         Return pair success, errmsg. If comparison succeeds, success is True
 127         and errmsg is empty. Otherwise success is False and errmsg holds a
 128         description of the mismatch.
 129
 130         Note: this function contains some rather horrible hacks to ignore
 131         differences which are not important for the verification of pyelftools.
 132         This is due to some intricacies of binutils's readelf which pyelftools
 133         doesn't currently implement, features that binutils doesn't support,
 134         or silly inconsistencies in the output of readelf, which I was reluctant
 135         to replicate. Read the documentation for more details.
 136     """
 137     def prepare_lines(s):
 138         return [line for line in s.lower().splitlines() if line.strip() != '']
 139
 140     lines1 = prepare_lines(s1)
 141     lines2 = prepare_lines(s2)
 142
 143     flag_in_debug_line_section = False
 144
 145     if len(lines1) != len(lines2):
 146         return False, 'Number of lines different: %s vs %s' % (
 147                 len(lines1), len(lines2))
 148
 149     # Position of the View column in the output file, if parsing readelf..decodedline
 150     # output, and the GNU readelf output contains the View column. Otherwise stays -1.
 151     view_col_position = -1
 152     for i in range(len(lines1)):
 153         if lines1[i].endswith('debug_line section:'):
 154             # .debug_line or .zdebug_line
 155             flag_in_debug_line_section = True
 156
 157         # readelf spelling error for GNU property notes
 158         lines1[i] = lines1[i].replace('procesor-specific type', 'processor-specific type')
 159
 160         # The view column position may change from CU to CU:
 161         if view_col_position >= 0 and lines1[i].startswith('cu:'):
 162             view_col_position = -1
 163
 164         # Check if readelf..decodedline output line contains the view column
 165         if flag_in_debug_line_section and lines1[i].startswith('file name') and view_col_position < 0:
 166             view_col_position = lines1[i].find("view")
 167             stmt_col_position = lines1[i].find("stmt")
 168
 169         # Excise the View column from the table, if any.
 170         # View_col_position is only set to a nonzero number if one of the previous
 171         # lines was a table header line with a "view" in it.
 172         # We assume careful formatting on GNU readelf's part - View column values
 173         # are not out of line with the View header.
 174         if view_col_position >= 0 and not lines1[i].endswith(':'):
 175             lines1[i] = lines1[i][:view_col_position] + lines1[i][stmt_col_position:]
 176
 177         # Compare ignoring whitespace
 178         lines1_parts = lines1[i].split()
 179         lines2_parts = lines2[i].split()
 180
 181         if ''.join(lines1_parts) != ''.join(lines2_parts):
 182             ok = False
 183
 184             try:
 185                 # Ignore difference in precision of hex representation in the
 186                 # last part (i.e. 008f3b vs 8f3b)
 187                 if (''.join(lines1_parts[:-1]) == ''.join(lines2_parts[:-1]) and
 188                     int(lines1_parts[-1], 16) == int(lines2_parts[-1], 16)):
 189                     ok = True
 190             except ValueError:
 191                 pass
 192
 193             sm = SequenceMatcher()
 194             sm.set_seqs(lines1[i], lines2[i])
 195             changes = sm.get_opcodes()
 196             if '[...]' in lines1[i]:
 197                 # Special case truncations with ellipsis like these:
 198                 #     .note.gnu.bu[...]        redelf
 199                 #     .note.gnu.build-i        pyelftools
 200                 # Or more complex for symbols with versions, like these:
 201                 #     _unw[...]@gcc_3.0        readelf
 202                 #     _unwind_resume@gcc_3.0   pyelftools
 203                 for p1, p2 in zip(lines1_parts, lines2_parts):
 204                     dots_start = p1.find('[...]')
 205                     if dots_start != -1:
 206                         break
 207                 ok = p1.endswith('[...]') and p1[:dots_start] == p2[:dots_start]
 208                 if not ok:
 209                     dots_end = dots_start + 5
 210                     if len(p1) > dots_end and p1[dots_end] == '@':
 211                         ok = (    p1[:dots_start] == p2[:dots_start]
 212                               and p1[p1.rfind('@'):] == p2[p2.rfind('@'):])
 213             elif 'at_const_value' in lines1[i]:
 214                 # On 32-bit machines, readelf doesn't correctly represent
 215                 # some boundary LEB128 numbers
 216                 val = lines2_parts[-1]
 217                 num2 = int(val, 16 if val.startswith('0x') else 10)
 218                 if num2 <= -2**31 and '32' in platform.architecture()[0]:
 219                     ok = True
 220             elif 'os/abi' in lines1[i]:
 221                 if 'unix - gnu' in lines1[i] and 'unix - linux' in lines2[i]:
 222                     ok = True
 223             elif len(lines1_parts) == 3 and lines1_parts[2] == 'nt_gnu_property_type_0':
 224                 # readelf does not seem to print a readable description for this
 225                 ok = lines1_parts == lines2_parts[:3]
 226             else:
 227                 for s in ('t (tls)', 'l (large)', 'd (mbind)'):
 228                     if s in lines1[i] or s in lines2[i]:
 229                         ok = True
 230                         break
 231             if not ok:
 232                 errmsg = 'Mismatch on line #%s:\n>>%s<<\n>>%s<<\n (%r)' % (
 233                     i, lines1[i], lines2[i], changes)
 234                 return False, errmsg
 235     return True, ''
 236
 237
 238 def main():
 239     if not is_in_rootdir():
 240         testlog.error('Error: Please run me from the root dir of pyelftools!')
 241         return 1
 242
 243     argparser = argparse.ArgumentParser(
 244         usage='usage: %(prog)s [options] [file] [file] ...',
 245         prog='run_readelf_tests.py')
 246     argparser.add_argument('files', nargs='*', help='files to run tests on')
 247     argparser.add_argument(
 248         '--parallel', action='store_true',
 249         help='run tests in parallel; always runs all tests w/o verbose')
 250     argparser.add_argument('-V', '--verbose',
 251                            action='store_true', dest='verbose',
 252                            help='verbose output')
 253     argparser.add_argument(
 254         '-k', '--keep-going',
 255         action='store_true', dest='keep_going',
 256         help="Run all tests, don't stop at the first failure")
 257     argparser.add_argument('--opt',
 258         action='store', dest='opt', metavar='<readelf-option>',
 259         help= 'Limit the test one one readelf option.')
 260     args = argparser.parse_args()
 261
 262     if args.parallel:
 263         if args.verbose or args.keep_going == False:
 264             print('WARNING: parallel mode disables verbosity and always keeps going')
 265
 266     if args.verbose:
 267         testlog.info('Running in verbose mode')
 268         testlog.info('Python executable = %s' % sys.executable)
 269         testlog.info('readelf path = %s' % READELF_PATH)
 270         testlog.info('Given list of files: %s' % args.files)
 271
 272     # If file names are given as command-line arguments, only these files
 273     # are taken as inputs. Otherwise, autodiscovery is performed.
 274     if len(args.files) > 0:
 275         filenames = args.files
 276     else:
 277         filenames = sorted(discover_testfiles('test/testfiles_for_readelf'))
 278
 279     if len(filenames) > 1 and args.parallel:
 280         pool = Pool()
 281         results = pool.map(run_test_on_file, filenames)
 282         failures = results.count(False)
 283     else:
 284         failures = 0
 285         for filename in filenames:
 286             if not run_test_on_file(filename, args.verbose, args.opt):
 287                 failures += 1
 288                 if not args.keep_going:
 289                     break
 290
 291     if failures == 0:
 292         testlog.info('\nConclusion: SUCCESS')
 293         return 0
 294     elif args.keep_going:
 295         testlog.info('\nConclusion: FAIL ({}/{})'.format(
 296             failures, len(filenames)))
 297         return 1
 298     else:
 299         testlog.info('\nConclusion: FAIL')
 300         return 1
 301
 302
 303 if __name__ == '__main__':
 304     sys.exit(main())