lxo/532/comp16-v1-skel.py

   1 #! /bin/env python3
   2
   3 # Feed this script the output of objdump -M raw --no-show-raw-insn ppc-prog
   4
   5 # It will look for insns that can be represented in compressed mode,
   6 # according to the encoding rules in the copcond dictionary below.
   7
   8 # Nothing is assumed as to the actual bit-encoding of the insns, this
   9 # is just to experiment with insn selection and get a quick feedback
  10 # loop for the encoding options in compressed mode.
  11
  12 # In this script, the computations of encoding modes and transitions
  13 # are those for attempt 1 encoding, that encompasses:
  14
  15 # - a 16-bit insn (with 10-bit payload) that may switch to compressed
  16 # mode or return to 32-bit mode;
  17
  18 # - 16-bit insns in compressed mode, each with 2 bits devoted to
  19 # encoding one of the following possibilities:
  20
  21 # -- switch back to uncompressed mode at the next insn
  22
  23 # -- interpret the next insn in uncompressed mode, then return to
  24 # compressed mode
  25
  26 # -- remain in 16-bit mode for the next insn
  27
  28 # -- take the 16 bits that would be the next compressed insn as an
  29 # extension to the present 16-bit insn, and remain in 16-bit mode for
  30 # the subsequent 16-bits
  31
  32 # At (visible) entry points, mode is forced to return to uncompressed
  33 # mode.
  34
  35 # The entire code stream is printed, without any attempt to modify the
  36 # addresses that go along with or in them; we only insert markers for
  37 # the transition points, and for the compressed instructions.
  38
  39 # The really useful information is printed at the end: a summary of
  40 # transition and compressed-insn counts, and the achieved compression
  41 # rate.
  42
  43 import sys
  44 import re
  45
  46 insn = re.compile('\s+(?P<addr>[0-9a-f]+):\s+(?P<opcode>[^ ]+) *(?P<operands>.*)')
  47
  48 opkind = re.compile('(?P<reg>(?P<regkind>[cf]?r)(?P<regnum>[0-9]+))|(?P<immediate>-?[0-9]+)|(?P<branch>[0-9a-f]+)(?: <.*>)?|(?P<offset>-?[0-9]+)\((?P<basereg>r[0-9]+)\)')
  49
  50 def mapop(op):
  51     match = opkind.fullmatch(op)
  52
  53     if match is None:
  54         op = ('other', op)
  55     elif match['reg'] is not None:
  56         op = (match['regkind'], int(match['regnum']), op)
  57     elif match['immediate'] is not None:
  58         op = ('imm', int (op).bit_length (), op)
  59     elif match['branch'] is not None:
  60         op = ('pcoff', (int (match['branch'], 16)
  61                         - int (addr, 16)).bit_length (), op, addr)
  62     elif match['offset'] is not None:
  63         op = ('ofst', mapop(match['offset']), mapop(match['basereg']), op)
  64     else:
  65         raise "unrecognized operand kind"
  66
  67     return op
  68
  69 def opclass(mop):
  70     return mop[0]
  71 def regno(mop):
  72     if mop[0] in { 'r', 'fr', 'cr' }:
  73         return mop[1]
  74     else:
  75         raise "operand is not a register"
  76
  77 def immbits(mop):
  78     if mop[0] is 'imm':
  79         return mop[1]
  80     else:
  81         raise "operand is not an immediate"
  82
  83 # Following are predicates to be used in copcond, to tell the mode in
  84 # which opcode with ops as operands is to be represented
  85
  86 # Any occurrence of the opcode can be compressed.
  87 def anyops(opcode, ops):
  88     return 1
  89
  90 # Compress iff first and second operands are the same.
  91 def same01(opcode, ops):
  92     if ops[0] == ops[1]:
  93         return 1
  94     else:
  95         return 0
  96
  97 # Registers representable in a made-up 3-bit mapping.
  98 cregs2 = { 1, 2, 3, 4, 5, 6, 7, 31 }
  99
 100 # Return true iff mop is a regular register present in cregs2
 101 def bin2regs3(mop):
 102     return opclass(mop) is 'r' and regno(mop) in cregs2
 103
 104 # Return true iff mop is an immediate of at most 8 bits.
 105 def imm8(mop):
 106     return opclass(mop) is 'imm' and immbits(mop) <= 8
 107
 108 # Compress binary opcodes iff the first two operands (output and first
 109 # input operand) are registers representable in 3 bits in compressed
 110 # mode, and the immediate operand can be represented in 8 bits.
 111 def bin2regs3imm8(opcode, ops):
 112     if bin2regs3(ops[0]) and bin2regs3(ops[1]) and imm8(ops[2]):
 113         return 1
 114     else:
 115         return 0
 116
 117 # Map opcodes that might be compressed to a function that returns the
 118 # best potential encoding kind for the insn, per the numeric coding
 119 # below.
 120 copcond = {
 121
 122 }
 123
 124 # We have 4 kinds of insns:
 125
 126 # 0: uncompressed; leave input insn unchanged
 127 # 1: 16-bit compressed, only in compressed mode
 128 # 2: 16-bit extended by another 16-bit, only in compressed mode
 129 # 3: 10-bit compressed, may switch to compressed mode
 130
 131 # count[0:3] count the occurrences of the base kinds.
 132 # count[4] counts extra 10-bit nop-switches to compressed mode,
 133 # tentatively introduced before insns that can be 16-bit encoded.
 134 count = [0,0,0,0,0]
 135 # Default comments for the insn kinds above.  2 is always tentative.
 136 comments = ['', '\t; 16-bit', '\t; tentative 16+16-bit', '\t; 10-bit']
 137
 138 # cur stands for the insn kind that we read and processed in the
 139 # previous iteration of the loop, and prev is the one before it.  the
 140 # one we're processing in the current iteration will be stored in
 141 # next until we make it cur at the very end of the loop.
 142 prev = cur = 0
 143
 144 for line in sys.stdin:
 145     if line[-1] is '\n':
 146         line = line[:-1]
 147
 148     match = insn.fullmatch(line)
 149     if match is None:
 150         print(line)
 151         # Switch to uncompressed mode at function boundaries
 152         prev = prev2 = 0
 153         continue
 154
 155     addr = match['addr']
 156     opcode = match['opcode']
 157     operands = match['operands']
 158
 159     if opcode in copcond:
 160         next = copcond[opcode](opcode,
 161                                [mapop(op) for op in operands.split(',')])
 162     else:
 163         next = 0
 164
 165     comment = None
 166
 167     if cur is 0:
 168         if next is 0:
 169             True # Uncompressed mode for good.
 170         elif next is 1:
 171             # If cur was not a single uncompressed mode insn,
 172             # tentatively encode a 10-bit nop to enter compressed
 173             # mode, and then 16-bit.  It takes as much space as
 174             # encoding as 32-bit, but offers more possibilities for
 175             # subsequent compressed encodings.  A compressor proper
 176             # would have to go back and change the encoding
 177             # afterwards, but wé re just counting.
 178             if prev is not 1:
 179                 print('\t\th.nop\t\t; tentatively switch to compressed mode')
 180                 count[4] += 1
 181                 comment = 'tentatively compressed to 16-bit'
 182         elif next is 2:
 183             # We can use compressed encoding for next after an
 184             # uncompressed insn only if it's the single-insn
 185             # uncompressed mode slot.  For anything else, we're better
 186             # off using uncompressed encoding for next, since it makes
 187             # no sense to spend a 10-bit nop to switch to compressed
 188             # mode for a 16+16-bit insn.  If subsequent insns would
 189             # benefit from compressed encoding, we can switch then.
 190             if prev is not 1:
 191                 next = 0
 192                 comment = 'not worth a nop for 16+16-bit'
 193         elif next is 3:
 194             # If prev was 16-bit compressed, cur would be in the
 195             # single-insn uncompressed slot, so next could be encoded
 196             # as 16-bit, enabling another 1-insn uncompressed slot
 197             # after next that a 10-bit insn wouldn't, so make it so.
 198             if prev is 1:
 199                 next = 1
 200                 comment = '16-bit, could be 10-bit'
 201     elif cur is 1:
 202         # After a 16-bit insn, anything goes.  If it remains in 16-bit
 203         # mode, we can have 1 or 2 as next; if it returns to 32-bit
 204         # mode, we can have 0 or 3.  Using 1 instead of 3 makes room
 205         # for a subsequent single-insn compressed mode, so prefer
 206         # that.
 207         if next is 3:
 208             next = 1
 209             comment = '16-bit, could be 10-bit'
 210     elif cur is 2:
 211         # After a 16+16-bit insn, we can't switch directly to 32-bit
 212         # mode.  However, cur could have been encoded as 32-bit, since
 213         # any 16+16-bit insn can.  Indeed, we may have an arbitrary
 214         # long sequence of 16+16-bit insns before next, and if next
 215         # can only be encoded in 32-bit mode, we can "resolve" all
 216         # previous adjacent 16+16-bit insns to the corresponding
 217         # 32-bit insns in the encoding, and "adjust" the 16-bit or
 218         # 10-bit insn that enabled the potential 16+16-bit encoding to
 219         # switch to 32-bit mode then instead.
 220         if next is 0:
 221             prev = cur = 0
 222             comment = '32-bit, like tentative 16+16-bit insns above'
 223     elif cur is 3:
 224         # After a 10-bit insn, another insn that could be encoded as
 225         # 10-bit might as well be encoded as 16-bit, to make room for
 226         # a single-insn uncompressed insn afterwards.
 227         if next is 3:
 228             next = 1
 229             comment = '16-bit, could be 10-bit'
 230     else:
 231         raise "unknown mode for previous insn"
 232
 233     count[next] += 1
 234
 235     if comment is None:
 236         comment = comments[next]
 237     else:
 238         comment = '\t; ' + comment
 239
 240     print(line + comment)
 241
 242     prev = cur
 243     cur = next
 244
 245 transition_bytes = 2 * count[4]
 246 compressed_bytes = 2 * (count[1] + count[3])
 247 uncompressed_bytes = 4 * (count[0] + count[2])
 248 total_bytes = transition_bytes + compressed_bytes + uncompressed_bytes
 249 original_bytes = 2 * compressed_bytes + uncompressed_bytes
 250
 251 print()
 252 print('Summary')
 253 print('32-bit uncompressed instructions: %i' % count[0])
 254 print('16-bit compressed instructions: %i' % count[1])
 255 print('16+16-bit (tentative) compressed-mode instructions: %i' % count[2])
 256 print('10-bit compressed instructions: %i' % count[3])
 257 print('10-bit (tentative) mode-switching nops: %i' % count[4])
 258 print('Compressed size estimate: %i' % total_bytes)
 259 print('Original size: %i' % original_bytes)
 260 print('Compressed/original ratio: %f' % (total_bytes / original_bytes))