comp16-v1-skel.py: fixed 16-imm logic, implemented rules for most opcodes
[libreriscv.git] / openpower / sv / comp16-v1-skel.py
1 #! /bin/env python3
2 # see https://bugs.libre-soc.org/show_bug.cgi?id=532
3
4 # Estimate ppc code compression with Libre-SOC encoding attempt v1.
5
6
7 # Copyright 2020 Alexandre Oliva
8
9 # This script is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 3, or (at your option)
12 # any later version.
13
14 # This script is distributed in the hope that it will be useful, but
15 # WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # General Public License for more details.
18
19 # You should have received a copy of the GNU General Public License
20 # along with this script; see the file COPYING3. If not see
21 # <http://www.gnu.org/licenses/>.
22
23 # Skeleton originally by Alexandre Oliva <oliva@gnu.org>.
24
25
26 # Feed this script the output of objdump -M raw --no-show-raw-insn ppc-prog
27
28 # It will look for insns that can be represented in compressed mode,
29 # according to the encoding rules in the copcond dictionary below.
30
31 # Nothing is assumed as to the actual bit-encoding of the insns, this
32 # is just to experiment with insn selection and get a quick feedback
33 # loop for the encoding options in compressed mode.
34
35 # In this script, the computations of encoding modes and transitions
36 # are those for attempt 1 encoding, that encompasses:
37
38 # - a 16-bit insn (with 10-bit payload) that may switch to compressed
39 # mode or return to 32-bit mode;
40
41 # - 16-bit insns in compressed mode, each with 2 bits devoted to
42 # encoding one of the following possibilities:
43
44 # -- switch back to uncompressed mode at the next insn
45
46 # -- interpret the next insn in uncompressed mode, then return to
47 # compressed mode
48
49 # -- remain in 16-bit mode for the next insn
50
51 # - a 16-bit immediate insn in compressed mode, that must be followed
52 # by another compressed insn
53
54 # At (visible) entry points, mode is forced to return to uncompressed
55 # mode. Every branch target must be in uncompressed mode as well, but
56 # this script does not enforce that.
57
58 # The entire code stream is printed, without any attempt to modify the
59 # addresses that go along with or in them; we only insert markers for
60 # the transition points, and for the compressed instructions.
61
62 # The really useful information is printed at the end: a summary of
63 # transition and compressed-insn counts, and the achieved compression
64 # rate.
65
66 import sys
67 import re
68
69 insn = re.compile('\s+(?P<addr>[0-9a-f]+):\s+(?P<opcode>[^ ]+) *(?P<operands>.*)')
70
71 # reg is a regkind (r, cr, fr) followed by a regnum
72 xreg = '(?P<reg>(?P<regkind>[cf]?r)(?P<regnum>[0-9]+))'
73
74 # immediate is a sequence of digits, possibly preceded by a negative sign
75 ximm = '(?P<immediate>-?[0-9]+)'
76
77 # branch is a branch target address; ignore an angle-bracketed label after it
78 xbrt = '(?P<branch>[0-9a-f]+)(?: <.*>)?'
79
80 # offset is like immediate, but followed by a parenthesized basereg
81 xoff = '(?P<offset>-?[0-9]+)\((?P<basereg>r[0-9]+)\)'
82
83 # Combine the above into alternatives, to easily classify operands by
84 # pattern matching.
85 opkind = re.compile('|'.join([xreg, ximm, xbrt, xoff]))
86
87 # Pre-parse and classify op into a mop, short for mapped op.
88 def mapop(op):
89 match = opkind.fullmatch(op)
90
91 if match is None:
92 op = ('other', op)
93 elif match['reg'] is not None:
94 op = (match['regkind'], int(match['regnum']), op)
95 elif match['immediate'] is not None:
96 op = ('imm', int (op).bit_length (), op)
97 elif match['branch'] is not None:
98 op = ('pcoff', (int (match['branch'], 16)
99 - int (addr, 16)).bit_length (), op, addr)
100 elif match['offset'] is not None:
101 op = ('ofst', mapop(match['offset']), mapop(match['basereg']), op)
102 # FIXME: cr exprs not handled
103 else:
104 raise "unrecognized operand kind"
105
106 return op
107
108 # Accessor to enable the mapop representation to change easily.
109 def opclass(mop):
110 return mop[0]
111
112 # Some opclass predicates, for the same reason.
113 def regp(mop):
114 return opclass(mop) in { 'r', 'fr', 'cr' } \
115 or (opclass(mop) is 'imm' and mop[1] is 0)
116 def immp(mop):
117 return opclass(mop) in { 'imm', 'pcoff' }
118 def rofp(mop):
119 return opclass(mop) is 'ofst'
120
121 # Some more accessors.
122
123 # Return the reg number if mop fits regp.
124 def regno(mop):
125 if regp(mop) \
126 or (immp(mop) and mop[1] is 0):
127 return mop[1]
128 raise "operand is not a register"
129
130 def immval(mop):
131 if immp(mop):
132 return int(mop[2])
133 raise "operand is not an immediate"
134
135 # Return the immediate length if mop fits immp.
136 def immbits(mop):
137 if immp(mop):
138 return mop[1]
139 raise "operand is not an immediate"
140
141 # Return the register sub-mop if mop fits rofp.
142 def rofreg(mop):
143 if rofp(mop):
144 return mop[2]
145 raise "operand is not an offset"
146
147 # Return the offset sub-opt if mop fits rofp.
148 def rofset(mop):
149 if rofp(mop):
150 return mop[1]
151 raise "operand is not an offset"
152
153 # Following are predicates to be used in copcond, to tell the mode in
154 # which opcode with ops as operands is to be represented.
155
156 # TODO: use insn_histogram.py to show the best targets
157 # (remember to exclude nop - ori r0,r0,0 as this skews numbers)
158 # Registers representable in a made-up 3-bit mapping.
159 # It must contain 0 for proper working of at least storex.
160 cregs3 = { 0, 31, 1, 2, 3, 4, 5, 6, 7 }
161 # Ditto in a 2-bit mapping. It needs not contain 0, but it must be a
162 # subset of cregs3 for proper working of at least storex.
163 cregs2 = { 2, 3, 4, 5 }
164 # Use the same sets for FP for now.
165 cfregs3 = cregs3
166 cfregs2 = cregs2
167
168 # Return true iff mop is a regular register present in cregs2
169 def rcregs2(mop):
170 return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs2
171
172 # Return true iff mop is a regular register present in cregs3
173 def rcregs3(mop):
174 return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs3
175
176 # Return true iff mop is a floating-point register present in cfregs2
177 def rcfregs2(mop):
178 return opclass(mop) is 'fr' and regno(mop) in cfregs2
179
180 # Return true iff mop is a floating-point register present in cfregs3
181 def rcfregs3(mop):
182 return opclass(mop) is 'fr' and regno(mop) in cfregs3
183
184
185 # Return true iff mop is an immediate of at most 8 bits.
186 def imm8(mop):
187 return immp(mop) and immbits(mop) <= 8
188
189 # Return true iff mop is an immediate of at most 12 bits.
190 def imm12(mop):
191 return immp(mop) and immbits(mop) <= 12
192
193 # Compress binary opcodes iff the first two operands (output and first
194 # input operand) are registers representable in 3 bits in compressed
195 # mode, and the immediate operand can be represented in 8 bits.
196 def bin2regs3imm8(opcode, ops):
197 if rcregs3(ops[0]) and rcregs3(ops[1]) and imm8(ops[2]):
198 return 1
199 return 0
200
201 # Recognize do-nothing insns, particularly ori r0,r0,0.
202 def maybenop(opcode, ops):
203 if opcode in ['ori', 'addi'] and regno(ops[0]) is regno(ops[1]) \
204 and ops[0][0] is 'r' and regno(ops[0]) is 0 \
205 and imm8(ops[2]) and immbits(ops[2]) is 0:
206 return 3
207 return 0
208
209 # Recognize an unconditional branch, that can be represented with a
210 # 6-bit operand in 10-bit mode, an an additional 4 bits in 16-bit
211 # mode. In both cases, the offset is shifted left by 2 bits.
212 def uncondbranch(opcode, ops):
213 if imm8(ops[0]):
214 return 3
215 if imm12(ops[0]):
216 return 1
217 return 0
218
219 # 2 bits for RT and RA. RB is r0 in 10-bit, and 3 bits in 16-bit
220 # ??? there's a general assumption that, if an insn can be represented
221 # in 10-bits, then it can also be represented in 10 bits.
222 # This will not be the case if cregs3 can't represent register 0.
223 def storexaddr(opcode, ops):
224 # Canonicalize offset in ops[1] to reg, imm
225 if rofp(ops[1]):
226 ops = (ops[0], rofreg(ops[1]), rofset(ops[1]))
227 # Require offset 0 for compression of non-indexed form.
228 if not regp(ops[2]):
229 return 0
230 # If any of the registers is zero, and the other fits in cregs2,
231 # it fits in 10-bit.
232 if (rcregs2(ops[1]) and regno(ops[2]) is 0) \
233 or (regno(ops[1]) is 0 and rcregs2(ops[2])):
234 return 3
235 # For 16-bit one must fit rcregs2 and the other rcregs3.
236 if (rcregs2(ops[1]) and rcregs3(ops[2])) \
237 or (rcregs3(ops[1]) and rcregs2(ops[2])):
238 return 1
239 return 0
240 def rstorex(opcode, ops):
241 if rcregs2(ops[0]):
242 return storexaddr(opcode, ops)
243 return 0
244 def frstorex(opcode, ops):
245 if rcfregs2(ops[0]):
246 return storexaddr(opcode, ops)
247 return 0
248
249 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit,
250 # RB and RT must match. ??? It's not clear what that means WRT
251 # register mapping of different kinds of registers, e.g. when RT is a
252 # floating-point register..
253 def loadxaddr(opcode, ops):
254 if rofp(ops[1]):
255 ops = (ops[0], rofreg(ops[1]), rofset(ops[1]))
256 # Require offset 0 for compression of non-indexed form.
257 if not regp(ops[2]):
258 return 0
259 if rcregs3(ops[1]) and rcregs3(ops[2]):
260 if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }:
261 return 3
262 return 1
263 return 0
264 def rloadx(opcode, ops):
265 if rcregs3(ops[0]):
266 return loadxaddr(opcode, ops)
267 return 0
268 def frloadx(opcode, ops):
269 if rcfregs3(ops[0]):
270 return loadxaddr(opcode, ops)
271 return 0
272
273 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit,
274 # RB and RT must match. RA must not be zero, but in 16-bit mode we
275 # can swap RA and RB to make it fit.
276 def addop(opcode, ops):
277 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
278 if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }:
279 return 3
280 if regno(ops[1]) is not 0 or regno(ops[2]) is not 0:
281 return 1
282 return 0
283
284 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit,
285 # RA and RT must match. ??? The spec says RB, but the actual opcode
286 # is subf., subtract from, and it subtracts RA from RB. 'neg.' would
287 # make no sense as described there if we didn't use RA.
288 def subfop(opcode, ops):
289 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
290 if regno(ops[0]) is regno(ops[1]):
291 return 3
292 return 1
293 return 0
294 def negop(opcode, ops):
295 if rcregs3(ops[0]) and rcregs3(ops[1]):
296 return 3
297 return 0
298
299 # 3 bits for RA and 3 bits for RB. L (op1) must be 1 for 10-bit.
300 # op0 is a cr, must be zero for 10-bit.
301 def cmpop(opcode, ops):
302 if rcregs3(ops[2]) and rcregs3(ops[3]):
303 if regno(ops[0]) is 0 and immval(ops[1]) is 1:
304 return 3
305 return 1
306 return 0
307
308 # 3 bits for RS, 3 bits for RB, 3 bits for RS, 16-bit only.
309 def sldop(opcode, ops):
310 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
311 return 1
312 return 0
313 # same as sld, except RS must be nonzero.
314 def srdop(opcode, ops):
315 if regno(ops[1]) is not 0:
316 return sldop(opcode, ops)
317 return 0
318 # same as sld, except RS is given by RA, so they must be the same.
319 def sradop(opcode, ops):
320 if regno(ops[0]) is regno(ops[1]):
321 return sldop(opcode, ops)
322 return 0
323
324 # binary logical ops: and, nand, or, nor.
325 # 3 bits for RA (nonzero), 3 bits for RB, 3 bits for RT in 16-bit mode.
326 # RT is implicitly RB in 10-bit mode.
327 def binlog1016ops(opcode, ops):
328 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \
329 and regno(ops[1]) is not 0:
330 # mr RT, RB AKA or RT, RB, RB takes the 10-bit encoding
331 # of the 16-bit nor; we've already ruled out r0 as RB above.
332 if regno(ops[0]) is regno(ops[2]) and opcode is not 'nor':
333 return 3
334 # or and and, with two identical inputs, stand for mr.
335 # nor and nand, likewise, stand for not, that has its
336 # own unary 10-bit encoding.
337 if regno(ops[1]) is regno(ops[2]):
338 return 3
339 return 1
340 return 0
341 # 3 bits for RB, 3 bits for RT in 16-bit mode.
342 # RT is implicitly RB in 10-bit mode.
343 def unlog1016ops(opcode, ops):
344 if rcregs3(ops[0]) and rcregs3(ops[1]):
345 if regno(ops[0]) is regno(ops[1]):
346 return 3
347 return 1
348 return 0
349 # 16-bit only logical ops; no 10-bit encoding available
350 # same constraints as the 1016 ones above.
351 def binlog16ops(opcode, ops):
352 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \
353 and regno(ops[1]) is not 0:
354 return 1
355 return 0
356 def unlog16ops(opcode, ops):
357 if rcregs3(ops[0]) and rcregs3(ops[1]):
358 return 1
359 return 0
360
361 # binary floating-point ops
362 # 3 bits for FRA (nonzero), 3 bits for FRB, 3 bits for FRT in 16-bit mode.
363 # FRT is implicitly FRB in 10-bit mode.
364 def binfp1016ops(opcode, ops):
365 if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \
366 and regno(ops[1]) is not 0:
367 if regno(ops[0]) is regno(ops[2]):
368 return 3
369 return 1
370 return 0
371 def unfp1016ops(opcode, ops):
372 if rcfregs3(ops[0]) and rcfregs3(ops[1]):
373 if regno(ops[0]) is regno(ops[1]):
374 return 3
375 return 1
376 return 0
377 def binfp16ops(opcode, ops):
378 if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \
379 and regno(ops[1]) is not 0:
380 return 1
381 return 0
382 def unfp16ops(opcode, ops):
383 if rcfregs3(ops[0]) and rcfregs3(ops[1]):
384 return 1
385 return 0
386
387 def cnvfp16ops(opcode, ops):
388 if rcfregs2(ops[0]) and rcfregs2(ops[1]):
389 return 1
390 return 0
391
392 # Map opcodes that might be compressed to a function that returns the
393 # best potential encoding kind for the insn, per the numeric coding
394 # below.
395 copcond = {
396 'ori': maybenop, 'addi': maybenop,
397 # 'attn': binutils won't ever print this
398 'b': uncondbranch, 'bl': uncondbranch,
399 # 'bc', 'bcl': only in 16-imm mode, not implemented yet
400 # 'bclr', 'bclrl': available in 10- or 16-bit, not implemented yet
401 # 16-imm opcodes not implemented yet.
402 'stdx': rstorex, 'stwx': rstorex,
403 'std': rstorex, 'stw': rstorex, # only offset zero
404 'stfdx': frstorex, 'stfsx': frstorex,
405 'stfd': frstorex, 'stfs': frstorex, # only offset zero
406 # Assuming lwz* rather than lwa*.
407 'ldx': rloadx, 'lwzx': rloadx,
408 'ld': rloadx, 'lwz': rloadx, # only offset zero
409 'lfdx': rloadx, 'lfsx': rloadx,
410 'lfd': rloadx, 'lfs': rloadx, # only offset zero
411 'add': addop,
412 'subf.': subfop, 'neg.': negop,
413 # Assuming cmpl stands for cmpd, i.e., cmp with L=1.
414 # cmpw is cmp with L=0, 16-bit only.
415 'cmp': cmpop,
416 'sld.': sldop, 'srd.': srdop, 'srad.': sradop,
417 'and': binlog1016ops, 'nand': binlog1016ops,
418 'or': binlog1016ops, 'nor': binlog1016ops,
419 # assuming popcnt and cntlz mean the *d opcodes.
420 'popcntd': unlog1016ops, 'cntlzd': unlog1016ops, 'extsw': unlog1016ops,
421 # not RT, RB is mapped to nand/nor RT, RB, RB.
422 'xor': binlog16ops, 'eqv': binlog16ops,
423 # 'setvl.': unlog16ops, # ??? What's 'setvl.'?
424 # assuming cnttz mean the *d opcode.
425 'cnttzd': unlog16ops, 'extsb': unlog16ops, 'extsh': unlog16ops,
426 'fsub.': binfp1016ops, 'fadd': binfp1016ops, 'fmul': binfp1016ops,
427 'fneg.': unfp1016ops,
428 'fdiv': binfp16ops,
429 'fabs.': unfp16ops, 'fmr.': unfp16ops,
430 # ??? are these the intended fp2int and int2fp, for all
431 # combinations of signed/unsigned float/double?
432 'fcfid': cnvfp16ops, 'fctidz': cnvfp16ops,
433 'fcfidu': cnvfp16ops, 'fctiduz': cnvfp16ops,
434 'fcfids': cnvfp16ops, 'fctiwz': cnvfp16ops,
435 'fcfidus': cnvfp16ops, 'fctiwuz': cnvfp16ops,
436 # Not implemented yet:
437 # 10- and 16-bit:
438 # 'mcrf':
439 # 'cbank':
440 # 16-bit only:
441 # 'crnor':
442 # 'crandc':
443 # 'crxor':
444 # 'crnand':
445 # 'crand':
446 # 'creqv':
447 # 'crorc':
448 # 'cror':
449 # 'mtlr':
450 # 'mtctr':
451 # 'mflr':
452 # 'mfctr':
453 # 'mtcr':
454 # 'mfcr':
455 }
456
457 # We have 4 kinds of insns:
458
459 # 0: uncompressed; leave input insn unchanged
460 # 1: 16-bit compressed, only in compressed mode
461 # 2: 16-imm, i.e., compressed insn that can't switch-out of compressed mode
462 # 3: 10-bit compressed, may switch to compressed mode
463
464 # count[0:3] count the occurrences of the base kinds.
465 # count[4] counts extra 10-bit nop-switches to compressed mode,
466 # tentatively introduced before insns that can be 16-bit encoded.
467 # count[5] counts extra 10-bit nop-switches to compressed mode,
468 # tentatively introduced before insns that can be 16-imm encoded.
469 # count[6] counts extra 16-bit nop-switches back to uncompressed,
470 # introduced after a 16-imm insn.
471 count = [0,0,0,0,0,0,0]
472 # Default comments for the insn kinds above.
473 comments = ['', '\t; 16-bit', '\t; 16-imm', '\t; 10-bit']
474
475 # curi stands for the insn kind that we read and processed in the
476 # previous iteration of the loop, and previ is the one before it. the
477 # one we're processing in the current iteration will be stored in
478 # nexti until we make it curi at the very end of the loop.
479 previ = curi = 0
480
481 for line in sys.stdin:
482 if line[-1] is '\n':
483 line = line[:-1]
484
485 match = insn.fullmatch(line)
486 if match is None:
487 print(line)
488 # Switch to uncompressed mode at function boundaries
489 previ = curi = 0
490 continue
491
492 addr = match['addr']
493 opcode = match['opcode']
494 operands = match['operands']
495
496 if opcode in copcond:
497 nexti = copcond[opcode](opcode,
498 [mapop(op) for op in operands.split(',')])
499 else:
500 nexti = 0
501
502 comment = None
503
504 if curi is 0:
505 if nexti is 0:
506 True # Uncompressed mode for good.
507 elif nexti is 1:
508 # If curi was not a single uncompressed mode insn,
509 # tentatively encode a 10-bit nop to enter compressed
510 # mode, and then 16-bit. It takes as much space as
511 # encoding as 32-bit, but offers more possibilities for
512 # subsequent compressed encodings. A compressor proper
513 # would have to go back and change the encoding
514 # afterwards, but wé re just counting.
515 if previ is not 1:
516 print('\t\th.nop\t\t; 10-bit (tentative)')
517 count[4] += 1
518 comment = '16-bit (tentative)'
519 else:
520 comment = '16-bit auto-back'
521 elif nexti is 2:
522 # We can use compressed encoding for the 16-imm nexti
523 # after an uncompressed insn without penalty if it's the
524 # single-insn uncompressed mode slot. For other
525 # configurations, we can either remain in uncompressed
526 # mode, or switch to compressed mode with a 10-bit nop.
527 if previ is not 1:
528 print('\t\th.nop\t\t; 10-bit (tentative)')
529 count[5] += 1
530 comment = '16-imm (tentative), vs uncompressed'
531 else:
532 comment = '16-imm auto-back'
533 elif nexti is 3:
534 # If previ was 16-bit compressed, curi would be in the
535 # single-insn uncompressed slot, so nexti could be encoded
536 # as 16-bit, enabling another 1-insn uncompressed slot
537 # after nexti that a 10-bit insn wouldn't, so make it so.
538 if previ is 1:
539 nexti = 1
540 comment = '16-bit auto-back, vs 10-bit'
541 elif curi is 1:
542 # After a 16-bit insn, anything goes. If it remains in 16-bit
543 # mode, we can have 1 or 2 as nexti; if it returns to 32-bit
544 # mode, we can have 0 or 3. Using 1 instead of 3 makes room
545 # for a subsequent single-insn compressed mode, so prefer
546 # that.
547 if nexti is 3:
548 nexti = 1
549 comment = '16-bit, vs 10-bit'
550 elif curi is 2:
551 # After a 16-imm insn, we can only switch back to uncompressed
552 # mode with a 16-bit nop.
553 if nexti is 0:
554 print('\t\tc.nop\t\t; forced switch back to uncompressed mode')
555 count[6] += 1
556 previ = curi
557 curi = 1
558 elif nexti is 3:
559 nexti = 1
560 elif curi is 3:
561 # After a 10-bit insn, another insn that could be encoded as
562 # 10-bit might as well be encoded as 16-bit, to make room for
563 # a single-insn uncompressed insn afterwards.
564 if nexti is 3:
565 nexti = 1
566 comment = '16-bit, vs 10-bit'
567 else:
568 raise "unknown mode for previious insn"
569
570 count[nexti] += 1
571
572 if comment is None:
573 comment = comments[nexti]
574 else:
575 comment = '\t; ' + comment
576
577 print(line + comment)
578
579 previ = curi
580 curi = nexti
581
582 transition_bytes = 2 * (count[4] + count[5] + count[6])
583 compressed_bytes = 2 * (count[1] + count[3])
584 uncompressed_bytes = 4 * (count[0] + count[2])
585 total_bytes = transition_bytes + compressed_bytes + uncompressed_bytes
586 original_bytes = 2 * compressed_bytes + uncompressed_bytes
587
588 print()
589 print('Summary')
590 print('32-bit uncompressed instructions: %i' % count[0])
591 print('16-bit compressed instructions: %i' % count[1])
592 print('16-imm compressed-mode instructions: %i' % count[2])
593 print('10-bit compressed instructions: %i' % count[3])
594 print('10-bit mode-switching nops: %i' % count[4])
595 print('10-bit mode-switching nops for imm-16: %i' % count[5])
596 print('16-bit mode-switching nops after imm-16: %i' % count[6])
597 print('Compressed size estimate: %i' % total_bytes)
598 print('Original size: %i' % original_bytes)
599 print('Compressed/original ratio: %f' % (total_bytes / original_bytes))