test/run_readelf_tests.py

   1 #!/usr/bin/env python
   2 #-------------------------------------------------------------------------------
   3 # test/run_readelf_tests.py
   4 #
   5 # Automatic test runner for elftools & readelf
   6 #
   7 # Eli Bendersky (eliben@gmail.com)
   8 # This code is in the public domain
   9 #-------------------------------------------------------------------------------
  10 import argparse
  11 from difflib import SequenceMatcher
  12 import logging
  13 from multiprocessing import Pool
  14 import os
  15 import platform
  16 import re
  17 import sys
  18 import time
  19
  20 from utils import run_exe, is_in_rootdir, dump_output_to_temp_files
  21
  22 # Make it possible to run this file from the root dir of pyelftools without
  23 # installing pyelftools; useful for CI testing, etc.
  24 sys.path[0:0] = ['.']
  25
  26 # Create a global logger object
  27 testlog = logging.getLogger('run_tests')
  28 testlog.setLevel(logging.DEBUG)
  29 testlog.addHandler(logging.StreamHandler(sys.stdout))
  30
  31 # Set the path for calling readelf. We carry our own version of readelf around,
  32 # because binutils tend to change its output even between daily builds of the
  33 # same minor release and keeping track is a headache.
  34 if platform.system() == "Darwin": # MacOS
  35     READELF_PATH = 'greadelf'
  36 elif platform.system() == "Windows":
  37     # Point the environment variable READELF at Cygwin's readelf.exe, or some other Windows build
  38     READELF_PATH = os.environ.get('READELF', "readelf.exe")
  39 else:
  40     READELF_PATH = 'test/external_tools/readelf'
  41     if not os.path.exists(READELF_PATH):
  42         READELF_PATH = 'readelf'
  43
  44
  45 def discover_testfiles(rootdir):
  46     """ Discover test files in the given directory. Yield them one by one.
  47     """
  48     for filename in os.listdir(rootdir):
  49         _, ext = os.path.splitext(filename)
  50         if ext == '.elf':
  51             yield os.path.join(rootdir, filename)
  52
  53
  54 def run_test_on_file(filename, verbose=False, opt=None):
  55     """ Runs a test on the given input filename. Return True if all test
  56         runs succeeded.
  57         If opt is specified, rather that going over the whole
  58         set of supported readelf options, the test will only
  59         run for one option.
  60     """
  61     success = True
  62     testlog.info("Test file '%s'" % filename)
  63     if opt is None:
  64         options = [
  65             '-e', '-d', '-s', '-n', '-r', '-x.text', '-p.shstrtab', '-V',
  66             '--debug-dump=info', '--debug-dump=decodedline',
  67             '--debug-dump=frames', '--debug-dump=frames-interp',
  68             '--debug-dump=aranges', '--debug-dump=pubtypes',
  69             '--debug-dump=pubnames', '--debug-dump=loc',
  70             '--debug-dump=Ranges'
  71             ]
  72     else:
  73         options = [opt]
  74
  75     # TODO(sevaa): excluding two files from the --debug-dump=Ranges test until the maintainers
  76     # of GNU binutils fix https://sourceware.org/bugzilla/show_bug.cgi?id=30781
  77     if filename.endswith('dwarf_test_versions_mix.elf') or filename.endswith('dwarf_v5ops.so.elf'):
  78         options.remove('--debug-dump=Ranges')
  79
  80     for option in options:
  81         if verbose: testlog.info("..option='%s'" % option)
  82
  83         # TODO(zlobober): this is a dirty hack to make tests work for ELF core
  84         # dump notes. Making it work properly requires a pretty deep
  85         # investigation of how original readelf formats the output.
  86         if "core" in filename and option == "-n":
  87             if verbose:
  88                 testlog.warning("....will fail because corresponding part of readelf.py is not implemented yet")
  89                 testlog.info('.......................SKIPPED')
  90             continue
  91
  92         # sevaa says: there is another shorted out test; in dwarf_lineprogramv5.elf, the two bytes at 0x2072 were
  93         # patched from 0x07 0x10 to 00 00.
  94         # Those represented the second instruction in the first FDE in .eh_frame. This changed the instruction
  95         # from "DW_CFA_undefined 16" to two NOPs.
  96         # GNU readelf 2.38 had a bug here, had to work around:
  97         # https://sourceware.org/bugzilla/show_bug.cgi?id=29250
  98         # It's been fixed in the binutils' master since, but the latest master will break a lot.
  99         # Same patch in  dwarf_test_versions_mix.elf at 0x2061: 07 10 -> 00 00
 100
 101         # stdouts will be a 2-element list: output of readelf and output
 102         # of scripts/readelf.py
 103         stdouts = []
 104         for exe_path in [READELF_PATH, 'scripts/readelf.py']:
 105             args = [option, filename]
 106             if verbose: testlog.info("....executing: '%s %s'" % (
 107                 exe_path, ' '.join(args)))
 108             t1 = time.time()
 109             rc, stdout = run_exe(exe_path, args)
 110             if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,))
 111             if rc != 0:
 112                 testlog.error("@@ aborting - '%s %s' returned '%s'" % (exe_path, option, rc))
 113                 return False
 114             stdouts.append(stdout)
 115         if verbose: testlog.info('....comparing output...')
 116         t1 = time.time()
 117         rc, errmsg = compare_output(*stdouts)
 118         if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,))
 119         if rc:
 120             if verbose: testlog.info('.......................SUCCESS')
 121         else:
 122             success = False
 123             testlog.info('.......................FAIL')
 124             testlog.info('....for file %s' % filename)
 125             testlog.info('....for option "%s"' % option)
 126             testlog.info('....Output #1 is readelf, Output #2 is pyelftools')
 127             testlog.info('@@ ' + errmsg)
 128             dump_output_to_temp_files(testlog, filename, option, *stdouts)
 129     return success
 130
 131
 132 def compare_output(s1, s2):
 133     """ Compare stdout strings s1 and s2.
 134         s1 is from readelf, s2 from elftools readelf.py
 135         Return pair success, errmsg. If comparison succeeds, success is True
 136         and errmsg is empty. Otherwise success is False and errmsg holds a
 137         description of the mismatch.
 138
 139         Note: this function contains some rather horrible hacks to ignore
 140         differences which are not important for the verification of pyelftools.
 141         This is due to some intricacies of binutils's readelf which pyelftools
 142         doesn't currently implement, features that binutils doesn't support,
 143         or silly inconsistencies in the output of readelf, which I was reluctant
 144         to replicate. Read the documentation for more details.
 145     """
 146     def prepare_lines(s):
 147         return [line for line in s.lower().splitlines() if line.strip() != '']
 148
 149     lines1 = prepare_lines(s1)
 150     lines2 = prepare_lines(s2)
 151
 152     flag_in_debug_line_section = False
 153
 154     if len(lines1) != len(lines2):
 155         return False, 'Number of lines different: %s vs %s' % (
 156                 len(lines1), len(lines2))
 157
 158     # Position of the View column in the output file, if parsing readelf..decodedline
 159     # output, and the GNU readelf output contains the View column. Otherwise stays -1.
 160     view_col_position = -1
 161     for i in range(len(lines1)):
 162         if lines1[i].endswith('debug_line section:'):
 163             # .debug_line or .zdebug_line
 164             flag_in_debug_line_section = True
 165
 166         # readelf spelling error for GNU property notes
 167         lines1[i] = lines1[i].replace('procesor-specific type', 'processor-specific type')
 168
 169         # The view column position may change from CU to CU:
 170         if view_col_position >= 0 and lines1[i].startswith('cu:'):
 171             view_col_position = -1
 172
 173         # Check if readelf..decodedline output line contains the view column
 174         if flag_in_debug_line_section and lines1[i].startswith('file name') and view_col_position < 0:
 175             view_col_position = lines1[i].find("view")
 176             stmt_col_position = lines1[i].find("stmt")
 177
 178         # Excise the View column from the table, if any.
 179         # View_col_position is only set to a nonzero number if one of the previous
 180         # lines was a table header line with a "view" in it.
 181         # We assume careful formatting on GNU readelf's part - View column values
 182         # are not out of line with the View header.
 183         if view_col_position >= 0 and not lines1[i].endswith(':'):
 184             lines1[i] = lines1[i][:view_col_position] + lines1[i][stmt_col_position:]
 185
 186         # Compare ignoring whitespace
 187         lines1_parts = lines1[i].split()
 188         lines2_parts = lines2[i].split()
 189
 190         if ''.join(lines1_parts) != ''.join(lines2_parts):
 191             ok = False
 192
 193             try:
 194                 # Ignore difference in precision of hex representation in the
 195                 # last part (i.e. 008f3b vs 8f3b)
 196                 if (''.join(lines1_parts[:-1]) == ''.join(lines2_parts[:-1]) and
 197                     int(lines1_parts[-1], 16) == int(lines2_parts[-1], 16)):
 198                     ok = True
 199             except ValueError:
 200                 pass
 201
 202             sm = SequenceMatcher()
 203             sm.set_seqs(lines1[i], lines2[i])
 204             changes = sm.get_opcodes()
 205             if '[...]' in lines1[i]:
 206                 # Special case truncations with ellipsis like these:
 207                 #     .note.gnu.bu[...]        redelf
 208                 #     .note.gnu.build-i        pyelftools
 209                 # Or more complex for symbols with versions, like these:
 210                 #     _unw[...]@gcc_3.0        readelf
 211                 #     _unwind_resume@gcc_3.0   pyelftools
 212                 for p1, p2 in zip(lines1_parts, lines2_parts):
 213                     dots_start = p1.find('[...]')
 214                     if dots_start != -1:
 215                         break
 216                 ok = p1.endswith('[...]') and p1[:dots_start] == p2[:dots_start]
 217                 if not ok:
 218                     dots_end = dots_start + 5
 219                     if len(p1) > dots_end and p1[dots_end] == '@':
 220                         ok = (    p1[:dots_start] == p2[:dots_start]
 221                               and p1[p1.rfind('@'):] == p2[p2.rfind('@'):])
 222             elif 'at_const_value' in lines1[i]:
 223                 # On 32-bit machines, readelf doesn't correctly represent
 224                 # some boundary LEB128 numbers
 225                 val = lines2_parts[-1]
 226                 num2 = int(val, 16 if val.startswith('0x') else 10)
 227                 if num2 <= -2**31 and '32' in platform.architecture()[0]:
 228                     ok = True
 229             elif 'os/abi' in lines1[i]:
 230                 if 'unix - gnu' in lines1[i] and 'unix - linux' in lines2[i]:
 231                     ok = True
 232             elif len(lines1_parts) == 3 and lines1_parts[2] == 'nt_gnu_property_type_0':
 233                 # readelf does not seem to print a readable description for this
 234                 ok = lines1_parts == lines2_parts[:3]
 235             else:
 236                 for s in ('t (tls)', 'l (large)', 'd (mbind)'):
 237                     if s in lines1[i] or s in lines2[i]:
 238                         ok = True
 239                         break
 240             if not ok:
 241                 errmsg = 'Mismatch on line #%s:\n>>%s<<\n>>%s<<\n (%r)' % (
 242                     i, lines1[i], lines2[i], changes)
 243                 return False, errmsg
 244     return True, ''
 245
 246
 247 def main():
 248     if not is_in_rootdir():
 249         testlog.error('Error: Please run me from the root dir of pyelftools!')
 250         return 1
 251
 252     argparser = argparse.ArgumentParser(
 253         usage='usage: %(prog)s [options] [file] [file] ...',
 254         prog='run_readelf_tests.py')
 255     argparser.add_argument('files', nargs='*', help='files to run tests on')
 256     argparser.add_argument(
 257         '--parallel', action='store_true',
 258         help='run tests in parallel; always runs all tests w/o verbose')
 259     argparser.add_argument('-V', '--verbose',
 260                            action='store_true', dest='verbose',
 261                            help='verbose output')
 262     argparser.add_argument(
 263         '-k', '--keep-going',
 264         action='store_true', dest='keep_going',
 265         help="Run all tests, don't stop at the first failure")
 266     argparser.add_argument('--opt',
 267         action='store', dest='opt', metavar='<readelf-option>',
 268         help= 'Limit the test one one readelf option.')
 269     args = argparser.parse_args()
 270
 271     if args.parallel:
 272         if args.verbose or args.keep_going == False:
 273             print('WARNING: parallel mode disables verbosity and always keeps going')
 274
 275     if args.verbose:
 276         testlog.info('Running in verbose mode')
 277         testlog.info('Python executable = %s' % sys.executable)
 278         testlog.info('readelf path = %s' % READELF_PATH)
 279         testlog.info('Given list of files: %s' % args.files)
 280
 281     # If file names are given as command-line arguments, only these files
 282     # are taken as inputs. Otherwise, autodiscovery is performed.
 283     if len(args.files) > 0:
 284         filenames = args.files
 285     else:
 286         filenames = sorted(discover_testfiles('test/testfiles_for_readelf'))
 287
 288     if len(filenames) > 1 and args.parallel:
 289         pool = Pool()
 290         results = pool.map(run_test_on_file, filenames)
 291         failures = results.count(False)
 292     else:
 293         failures = 0
 294         for filename in filenames:
 295             if not run_test_on_file(filename, args.verbose, args.opt):
 296                 failures += 1
 297                 if not args.keep_going:
 298                     break
 299
 300     if failures == 0:
 301         testlog.info('\nConclusion: SUCCESS')
 302         return 0
 303     elif args.keep_going:
 304         testlog.info('\nConclusion: FAIL ({}/{})'.format(
 305             failures, len(filenames)))
 306         return 1
 307     else:
 308         testlog.info('\nConclusion: FAIL')
 309         return 1
 310
 311
 312 if __name__ == '__main__':
 313     sys.exit(main())