openpower/sv/comp16-v1-skel.py

   1 #! /bin/env python3
   2 # see https://bugs.libre-soc.org/show_bug.cgi?id=532
   3
   4 # Estimate ppc code compression with Libre-SOC encoding attempt v1.
   5
   6
   7 # Copyright 2020 Alexandre Oliva
   8
   9 # This script is free software; you can redistribute it and/or modify
  10 # it under the terms of the GNU General Public License as published by
  11 # the Free Software Foundation; either version 3, or (at your option)
  12 # any later version.
  13
  14 # This script is distributed in the hope that it will be useful, but
  15 # WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 # General Public License for more details.
  18
  19 # You should have received a copy of the GNU General Public License
  20 # along with this script; see the file COPYING3.  If not see
  21 # <http://www.gnu.org/licenses/>.
  22
  23 # Skeleton originally by Alexandre Oliva <oliva@gnu.org>.
  24
  25
  26 # Feed this script the output of objdump -M raw --no-show-raw-insn ppc-prog
  27
  28 # It will look for insns that can be represented in compressed mode,
  29 # according to the encoding rules in the copcond dictionary below.
  30
  31 # Nothing is assumed as to the actual bit-encoding of the insns, this
  32 # is just to experiment with insn selection and get a quick feedback
  33 # loop for the encoding options in compressed mode.
  34
  35 # In this script, the computations of encoding modes and transitions
  36 # are those for attempt 1 encoding, that encompasses:
  37
  38 # - a 16-bit insn (with 10-bit payload) that may switch to compressed
  39 # mode or return to 32-bit mode;
  40
  41 # - 16-bit insns in compressed mode, each with 2 bits devoted to
  42 # encoding one of the following possibilities:
  43
  44 # -- switch back to uncompressed mode at the next insn
  45
  46 # -- interpret the next insn in uncompressed mode, then return to
  47 # compressed mode
  48
  49 # -- remain in 16-bit mode for the next insn
  50
  51 # - a 16-bit immediate insn in compressed mode, that must be followed
  52 # by another compressed insn
  53
  54 # At (visible) entry points, mode is forced to return to uncompressed
  55 # mode.  Every branch target must be in uncompressed mode as well, but
  56 # this script does not enforce that.
  57
  58 # The entire code stream is printed, without any attempt to modify the
  59 # addresses that go along with or in them; we only insert markers for
  60 # the transition points, and for the compressed instructions.
  61
  62 # The really useful information is printed at the end: a summary of
  63 # transition and compressed-insn counts, and the achieved compression
  64 # rate.
  65
  66 import sys
  67 import re
  68
  69 insn = re.compile('\s+(?P<addr>[0-9a-f]+):\s+(?P<opcode>[^ ]+) *(?P<operands>.*)')
  70
  71 # reg is a regkind (r, cr, fr) followed by a regnum
  72 xreg = '(?P<reg>(?P<regkind>[cf]?r)(?P<regnum>[0-9]+))'
  73
  74 # immediate is a sequence of digits, possibly preceded by a negative sign
  75 ximm = '(?P<immediate>-?[0-9]+)'
  76
  77 # branch is a branch target address; ignore an angle-bracketed label after it
  78 xbrt = '(?P<branch>[0-9a-f]+)(?: <.*>)?'
  79
  80 # offset is like immediate, but followed by a parenthesized basereg
  81 xoff = '(?P<offset>-?[0-9]+)\((?P<basereg>r[0-9]+)\)'
  82
  83 # Combine the above into alternatives, to easily classify operands by
  84 # pattern matching.
  85 opkind = re.compile('|'.join([xreg, ximm, xbrt, xoff]))
  86
  87 # Pre-parse and classify op into a mop, short for mapped op.
  88 def mapop(op):
  89     match = opkind.fullmatch(op)
  90
  91     if match is None:
  92         op = ('other', op)
  93     elif match['reg'] is not None:
  94         op = (match['regkind'], int(match['regnum']), op)
  95     elif match['immediate'] is not None:
  96         op = ('imm', int (op).bit_length (), op)
  97     elif match['branch'] is not None:
  98         op = ('pcoff', (int (match['branch'], 16)
  99                         - int (addr, 16)).bit_length (), op, addr)
 100     elif match['offset'] is not None:
 101         op = ('ofst', mapop(match['offset']), mapop(match['basereg']), op)
 102     # FIXME: cr exprs not handled
 103     else:
 104         raise "unrecognized operand kind"
 105
 106     return op
 107
 108 # Accessor to enable the mapop representation to change easily.
 109 def opclass(mop):
 110     return mop[0]
 111
 112 # Some opclass predicates, for the same reason.
 113 def regp(mop):
 114     return opclass(mop) in { 'r', 'fr', 'cr' } \
 115         or (opclass(mop) is  'imm' and mop[1] is 0)
 116 def immp(mop):
 117     return opclass(mop) in { 'imm', 'pcoff' }
 118 def rofp(mop):
 119     return opclass(mop) is   'ofst'
 120
 121 # Some more accessors.
 122
 123 # Return the reg number if mop fits regp.
 124 def regno(mop):
 125     if regp(mop) \
 126        or (immp(mop) and mop[1] is 0):
 127         return mop[1]
 128     raise "operand is not a register"
 129
 130 def immval(mop):
 131     if immp(mop):
 132         return int(mop[2])
 133     raise "operand is not an immediate"
 134
 135 # Return the immediate length if mop fits immp.
 136 def immbits(mop):
 137     if immp(mop):
 138         return mop[1]
 139     raise "operand is not an immediate"
 140
 141 # Return the register sub-mop if mop fits rofp.
 142 def rofreg(mop):
 143     if rofp(mop):
 144         return mop[2]
 145     raise "operand is not an offset"
 146
 147 # Return the offset sub-opt if mop fits rofp.
 148 def rofset(mop):
 149     if rofp(mop):
 150         return mop[1]
 151     raise "operand is not an offset"
 152
 153 # Following are predicates to be used in copcond, to tell the mode in
 154 # which opcode with ops as operands is to be represented.
 155
 156 # TODO: use insn_histogram.py to show the best targets
 157 # (remember to exclude nop - ori r0,r0,0 as this skews numbers)
 158 # Registers representable in a made-up 3-bit mapping.
 159 # It must contain 0 for proper working of at least storex.
 160 cregs3 = { 0, 31, 1, 2, 3, 4, 5, 6, 7 }
 161 # Ditto in a 2-bit mapping.  It needs not contain 0, but it must be a
 162 # subset of cregs3 for proper working of at least storex.
 163 cregs2 = { 2, 3, 4, 5 }
 164 # Use the same sets for FP for now.
 165 cfregs3 = cregs3
 166 cfregs2 = cregs2
 167
 168 # Return true iff mop is a regular register present in cregs2
 169 def rcregs2(mop):
 170     return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs2
 171
 172 # Return true iff mop is a regular register present in cregs3
 173 def rcregs3(mop):
 174     return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs3
 175
 176 # Return true iff mop is a floating-point register present in cfregs2
 177 def rcfregs2(mop):
 178     return opclass(mop) is 'fr' and regno(mop) in cfregs2
 179
 180 # Return true iff mop is a floating-point register present in cfregs3
 181 def rcfregs3(mop):
 182     return opclass(mop) is 'fr' and regno(mop) in cfregs3
 183
 184
 185 # Return true iff mop is an immediate of at most 8 bits.
 186 def imm8(mop):
 187     return immp(mop) and immbits(mop) <= 8
 188
 189 # Return true iff mop is an immediate of at most 12 bits.
 190 def imm12(mop):
 191     return immp(mop) and immbits(mop) <= 12
 192
 193 # Compress binary opcodes iff the first two operands (output and first
 194 # input operand) are registers representable in 3 bits in compressed
 195 # mode, and the immediate operand can be represented in 8 bits.
 196 def bin2regs3imm8(opcode, ops):
 197     if rcregs3(ops[0]) and rcregs3(ops[1]) and imm8(ops[2]):
 198         return 1
 199     return 0
 200
 201 # Recognize do-nothing insns, particularly ori r0,r0,0.
 202 def maybenop(opcode, ops):
 203     if opcode in ['ori', 'addi'] and regno(ops[0]) is regno(ops[1]) \
 204        and ops[0][0] is 'r' and regno(ops[0]) is 0 \
 205        and imm8(ops[2]) and immbits(ops[2]) is 0:
 206         return 3
 207     return 0
 208
 209 # Recognize an unconditional branch, that can be represented with a
 210 # 6-bit operand in 10-bit mode, an an additional 4 bits in 16-bit
 211 # mode.  In both cases, the offset is shifted left by 2 bits.
 212 def uncondbranch(opcode, ops):
 213     if imm8(ops[0]):
 214         return 3
 215     if imm12(ops[0]):
 216         return 1
 217     return 0
 218
 219 # 2 bits for RT and RA.  RB is r0 in 10-bit, and 3 bits in 16-bit
 220 # ??? there's a general assumption that, if an insn can be represented
 221 # in 10-bits, then it can also be represented in 10 bits.
 222 # This will not be the case if cregs3 can't represent register 0.
 223 def storexaddr(opcode, ops):
 224     # Canonicalize offset in ops[1] to reg, imm
 225     if rofp(ops[1]):
 226         ops = (ops[0], rofreg(ops[1]), rofset(ops[1]))
 227         # Require offset 0 for compression of non-indexed form.
 228         if not regp(ops[2]):
 229             return 0
 230     # If any of the registers is zero, and the other fits in cregs2,
 231     # it fits in 10-bit.
 232     if (rcregs2(ops[1]) and regno(ops[2]) is 0) \
 233        or (regno(ops[1]) is 0 and rcregs2(ops[2])):
 234         return 3
 235     # For 16-bit one must fit rcregs2 and the other rcregs3.
 236     if (rcregs2(ops[1]) and rcregs3(ops[2])) \
 237        or (rcregs3(ops[1]) and rcregs2(ops[2])):
 238         return 1
 239     return 0
 240 def rstorex(opcode, ops):
 241     if rcregs2(ops[0]):
 242         return storexaddr(opcode, ops)
 243     return 0
 244 def frstorex(opcode, ops):
 245     if rcfregs2(ops[0]):
 246         return storexaddr(opcode, ops)
 247     return 0
 248
 249 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit.  for 10-bit,
 250 # RB and RT must match.  ??? It's not clear what that means WRT
 251 # register mapping of different kinds of registers, e.g. when RT is a
 252 # floating-point register..
 253 def loadxaddr(opcode, ops):
 254     if rofp(ops[1]):
 255         ops = (ops[0], rofreg(ops[1]), rofset(ops[1]))
 256         # Require offset 0 for compression of non-indexed form.
 257         if not regp(ops[2]):
 258             return 0
 259     if rcregs3(ops[1]) and rcregs3(ops[2]):
 260         if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }:
 261             return 3
 262         return 1
 263     return 0
 264 def rloadx(opcode, ops):
 265     if rcregs3(ops[0]):
 266         return loadxaddr(opcode, ops)
 267     return 0
 268 def frloadx(opcode, ops):
 269     if rcfregs3(ops[0]):
 270         return loadxaddr(opcode, ops)
 271     return 0
 272
 273 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit.  for 10-bit,
 274 # RB and RT must match.  RA must not be zero, but in 16-bit mode we
 275 # can swap RA and RB to make it fit.
 276 def addop(opcode, ops):
 277     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
 278         if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }:
 279             return 3
 280         if regno(ops[1]) is not 0 or regno(ops[2]) is not 0:
 281             return 1
 282     return 0
 283
 284 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit.  for 10-bit,
 285 # RA and RT must match.  ??? The spec says RB, but the actual opcode
 286 # is subf., subtract from, and it subtracts RA from RB.  'neg.' would
 287 # make no sense as described there if we didn't use RA.
 288 def subfop(opcode, ops):
 289     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
 290         if regno(ops[0]) is regno(ops[1]):
 291             return 3
 292         return 1
 293     return 0
 294 def negop(opcode, ops):
 295     if rcregs3(ops[0]) and rcregs3(ops[1]):
 296         return 3
 297     return 0
 298
 299 # 3 bits for RA and 3 bits for RB.  L (op1) must be 1 for 10-bit.
 300 # op0 is a cr, must be zero for 10-bit.
 301 def cmpop(opcode, ops):
 302     if rcregs3(ops[2]) and rcregs3(ops[3]):
 303         if regno(ops[0]) is 0 and immval(ops[1]) is 1:
 304             return 3
 305         return 1
 306     return 0
 307
 308 # 3 bits for RS, 3 bits for RB, 3 bits for RS, 16-bit only.
 309 def sldop(opcode, ops):
 310     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
 311         return 1
 312     return 0
 313 # same as sld, except RS must be nonzero.
 314 def srdop(opcode, ops):
 315     if regno(ops[1]) is not 0:
 316         return sldop(opcode, ops)
 317     return 0
 318 # same as sld, except RS is given by RA, so they must be the same.
 319 def sradop(opcode, ops):
 320     if regno(ops[0]) is regno(ops[1]):
 321         return sldop(opcode, ops)
 322     return 0
 323
 324 # binary logical ops: and, nand, or, nor.
 325 # 3 bits for RA (nonzero), 3 bits for RB, 3 bits for RT in 16-bit mode.
 326 # RT is implicitly RB in 10-bit mode.
 327 def binlog1016ops(opcode, ops):
 328     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \
 329        and regno(ops[1]) is not 0:
 330         # mr RT, RB AKA or RT, RB, RB takes the 10-bit encoding
 331         # of the 16-bit nor; we've already ruled out r0 as RB above.
 332         if regno(ops[0]) is regno(ops[2]) and opcode is not 'nor':
 333             return 3
 334         # or and and, with two identical inputs, stand for mr.
 335         # nor and nand, likewise, stand for not, that has its
 336         # own unary 10-bit encoding.
 337         if regno(ops[1]) is regno(ops[2]):
 338             return 3
 339         return 1
 340     return 0
 341 # 3 bits for RB, 3 bits for RT in 16-bit mode.
 342 # RT is implicitly RB in 10-bit mode.
 343 def unlog1016ops(opcode, ops):
 344     if rcregs3(ops[0]) and rcregs3(ops[1]):
 345         if regno(ops[0]) is regno(ops[1]):
 346             return 3
 347         return 1
 348     return 0
 349 # 16-bit only logical ops; no 10-bit encoding available
 350 # same constraints as the 1016 ones above.
 351 def binlog16ops(opcode, ops):
 352     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \
 353        and regno(ops[1]) is not 0:
 354         return 1
 355     return 0
 356 def unlog16ops(opcode, ops):
 357     if rcregs3(ops[0]) and rcregs3(ops[1]):
 358         return 1
 359     return 0
 360
 361 # binary floating-point ops
 362 # 3 bits for FRA (nonzero), 3 bits for FRB, 3 bits for FRT in 16-bit mode.
 363 # FRT is implicitly FRB in 10-bit mode.
 364 def binfp1016ops(opcode, ops):
 365     if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \
 366        and regno(ops[1]) is not 0:
 367         if regno(ops[0]) is regno(ops[2]):
 368             return 3
 369         return 1
 370     return 0
 371 def unfp1016ops(opcode, ops):
 372     if rcfregs3(ops[0]) and rcfregs3(ops[1]):
 373         if regno(ops[0]) is regno(ops[1]):
 374             return 3
 375         return 1
 376     return 0
 377 def binfp16ops(opcode, ops):
 378     if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \
 379        and regno(ops[1]) is not 0:
 380         return 1
 381     return 0
 382 def unfp16ops(opcode, ops):
 383     if rcfregs3(ops[0]) and rcfregs3(ops[1]):
 384         return 1
 385     return 0
 386
 387 def cnvfp16ops(opcode, ops):
 388     if rcfregs2(ops[0]) and rcfregs2(ops[1]):
 389         return 1
 390     return 0
 391
 392 # Map opcodes that might be compressed to a function that returns the
 393 # best potential encoding kind for the insn, per the numeric coding
 394 # below.
 395 copcond = {
 396     'ori': maybenop, 'addi': maybenop,
 397     # 'attn': binutils won't ever print this
 398     'b': uncondbranch, 'bl': uncondbranch,
 399     # 'bc', 'bcl': only in 16-imm mode, not implemented yet
 400     # 'bclr', 'bclrl': available in 10- or 16-bit, not implemented yet
 401     # 16-imm opcodes not implemented yet.
 402     'stdx': rstorex, 'stwx': rstorex,
 403     'std': rstorex, 'stw': rstorex, # only offset zero
 404     'stfdx': frstorex, 'stfsx': frstorex,
 405     'stfd': frstorex, 'stfs': frstorex, # only offset zero
 406     # Assuming lwz* rather than lwa*.
 407     'ldx': rloadx, 'lwzx': rloadx,
 408     'ld': rloadx, 'lwz': rloadx, # only offset zero
 409     'lfdx': rloadx, 'lfsx': rloadx,
 410     'lfd': rloadx, 'lfs': rloadx, # only offset zero
 411     'add': addop,
 412     'subf.': subfop, 'neg.': negop,
 413     # Assuming cmpl stands for cmpd, i.e., cmp with L=1.
 414     # cmpw is cmp with L=0, 16-bit only.
 415     'cmp': cmpop,
 416     'sld.': sldop, 'srd.': srdop, 'srad.': sradop,
 417     'and': binlog1016ops, 'nand': binlog1016ops,
 418     'or': binlog1016ops, 'nor': binlog1016ops,
 419     # assuming popcnt and cntlz mean the *d opcodes.
 420     'popcntd': unlog1016ops, 'cntlzd': unlog1016ops, 'extsw': unlog1016ops,
 421     # not RT, RB is mapped to nand/nor RT, RB, RB.
 422     'xor': binlog16ops, 'eqv': binlog16ops,
 423     # 'setvl.': unlog16ops, # ??? What's 'setvl.'?
 424     # assuming cnttz mean the *d opcode.
 425     'cnttzd': unlog16ops, 'extsb': unlog16ops, 'extsh': unlog16ops,
 426     'fsub.': binfp1016ops, 'fadd': binfp1016ops, 'fmul': binfp1016ops,
 427     'fneg.': unfp1016ops,
 428     'fdiv': binfp16ops,
 429     'fabs.': unfp16ops, 'fmr.': unfp16ops,
 430     # ??? are these the intended fp2int and int2fp, for all
 431     # combinations of signed/unsigned float/double?
 432     'fcfid': cnvfp16ops, 'fctidz': cnvfp16ops,
 433     'fcfidu': cnvfp16ops, 'fctiduz': cnvfp16ops,
 434     'fcfids': cnvfp16ops, 'fctiwz': cnvfp16ops,
 435     'fcfidus': cnvfp16ops, 'fctiwuz': cnvfp16ops,
 436     # Not implemented yet:
 437     # 10- and 16-bit:
 438     # 'mcrf':
 439     # 'cbank':
 440     # 16-bit only:
 441     # 'crnor':
 442     # 'crandc':
 443     # 'crxor':
 444     # 'crnand':
 445     # 'crand':
 446     # 'creqv':
 447     # 'crorc':
 448     # 'cror':
 449     # 'mtlr':
 450     # 'mtctr':
 451     # 'mflr':
 452     # 'mfctr':
 453     # 'mtcr':
 454     # 'mfcr':
 455 }
 456
 457 # We have 4 kinds of insns:
 458
 459 # 0: uncompressed; leave input insn unchanged
 460 # 1: 16-bit compressed, only in compressed mode
 461 # 2: 16-imm, i.e., compressed insn that can't switch-out of compressed mode
 462 # 3: 10-bit compressed, may switch to compressed mode
 463
 464 # count[0:3] count the occurrences of the base kinds.
 465 # count[4] counts extra 10-bit nop-switches to compressed mode,
 466 #   tentatively introduced before insns that can be 16-bit encoded.
 467 # count[5] counts extra 10-bit nop-switches to compressed mode,
 468 #   tentatively introduced before insns that can be 16-imm encoded.
 469 # count[6] counts extra 16-bit nop-switches back to uncompressed,
 470 #   introduced after a 16-imm insn.
 471 count = [0,0,0,0,0,0,0]
 472 # Default comments for the insn kinds above.
 473 comments = ['', '\t; 16-bit', '\t; 16-imm', '\t; 10-bit']
 474
 475 # curi stands for the insn kind that we read and processed in the
 476 # previous iteration of the loop, and previ is the one before it.  the
 477 # one we're processing in the current iteration will be stored in
 478 # nexti until we make it curi at the very end of the loop.
 479 previ = curi = 0
 480
 481 for line in sys.stdin:
 482     if line[-1] is '\n':
 483         line = line[:-1]
 484
 485     match = insn.fullmatch(line)
 486     if match is None:
 487         print(line)
 488         # Switch to uncompressed mode at function boundaries
 489         previ = curi = 0
 490         continue
 491
 492     addr = match['addr']
 493     opcode = match['opcode']
 494     operands = match['operands']
 495
 496     if opcode in copcond:
 497         nexti = copcond[opcode](opcode,
 498                                [mapop(op) for op in operands.split(',')])
 499     else:
 500         nexti = 0
 501
 502     comment = None
 503
 504     if curi is 0:
 505         if nexti is 0:
 506             True # Uncompressed mode for good.
 507         elif nexti is 1:
 508             # If curi was not a single uncompressed mode insn,
 509             # tentatively encode a 10-bit nop to enter compressed
 510             # mode, and then 16-bit.  It takes as much space as
 511             # encoding as 32-bit, but offers more possibilities for
 512             # subsequent compressed encodings.  A compressor proper
 513             # would have to go back and change the encoding
 514             # afterwards, but wé re just counting.
 515             if previ is not 1:
 516                 print('\t\th.nop\t\t; 10-bit (tentative)')
 517                 count[4] += 1
 518                 comment = '16-bit (tentative)'
 519             else:
 520                 comment = '16-bit auto-back'
 521         elif nexti is 2:
 522             # We can use compressed encoding for the 16-imm nexti
 523             # after an uncompressed insn without penalty if it's the
 524             # single-insn uncompressed mode slot.  For other
 525             # configurations, we can either remain in uncompressed
 526             # mode, or switch to compressed mode with a 10-bit nop.
 527             if previ is not 1:
 528                 print('\t\th.nop\t\t; 10-bit (tentative)')
 529                 count[5] += 1
 530                 comment = '16-imm (tentative), vs uncompressed'
 531             else:
 532                 comment = '16-imm auto-back'
 533         elif nexti is 3:
 534             # If previ was 16-bit compressed, curi would be in the
 535             # single-insn uncompressed slot, so nexti could be encoded
 536             # as 16-bit, enabling another 1-insn uncompressed slot
 537             # after nexti that a 10-bit insn wouldn't, so make it so.
 538             if previ is 1:
 539                 nexti = 1
 540                 comment = '16-bit auto-back, vs 10-bit'
 541     elif curi is 1:
 542         # After a 16-bit insn, anything goes.  If it remains in 16-bit
 543         # mode, we can have 1 or 2 as nexti; if it returns to 32-bit
 544         # mode, we can have 0 or 3.  Using 1 instead of 3 makes room
 545         # for a subsequent single-insn compressed mode, so prefer
 546         # that.
 547         if nexti is 3:
 548             nexti = 1
 549             comment = '16-bit, vs 10-bit'
 550     elif curi is 2:
 551         # After a 16-imm insn, we can only switch back to uncompressed
 552         # mode with a 16-bit nop.
 553         if nexti is 0:
 554             print('\t\tc.nop\t\t; forced switch back to uncompressed mode')
 555             count[6] += 1
 556             previ = curi
 557             curi = 1
 558         elif nexti is 3:
 559             nexti = 1
 560     elif curi is 3:
 561         # After a 10-bit insn, another insn that could be encoded as
 562         # 10-bit might as well be encoded as 16-bit, to make room for
 563         # a single-insn uncompressed insn afterwards.
 564         if nexti is 3:
 565             nexti = 1
 566             comment = '16-bit, vs 10-bit'
 567     else:
 568         raise "unknown mode for previious insn"
 569
 570     count[nexti] += 1
 571
 572     if comment is None:
 573         comment = comments[nexti]
 574     else:
 575         comment = '\t; ' + comment
 576
 577     print(line + comment)
 578
 579     previ = curi
 580     curi = nexti
 581
 582 transition_bytes = 2 * (count[4] + count[5] + count[6])
 583 compressed_bytes = 2 * (count[1] + count[3])
 584 uncompressed_bytes = 4 * (count[0] + count[2])
 585 total_bytes = transition_bytes + compressed_bytes + uncompressed_bytes
 586 original_bytes = 2 * compressed_bytes + uncompressed_bytes
 587
 588 print()
 589 print('Summary')
 590 print('32-bit uncompressed instructions: %i' % count[0])
 591 print('16-bit compressed instructions: %i' % count[1])
 592 print('16-imm compressed-mode instructions: %i' % count[2])
 593 print('10-bit compressed instructions: %i' % count[3])
 594 print('10-bit mode-switching nops: %i' % count[4])
 595 print('10-bit mode-switching nops for imm-16: %i' % count[5])
 596 print('16-bit mode-switching nops after imm-16: %i' % count[6])
 597 print('Compressed size estimate: %i' % total_bytes)
 598 print('Original size: %i' % original_bytes)
 599 print('Compressed/original ratio: %f' % (total_bytes / original_bytes))