comp16-v1-skel: completed implementation of all opcodes
[libreriscv.git] / openpower / sv / comp16-v1-skel.py
1 #! /bin/env python3
2 # see https://bugs.libre-soc.org/show_bug.cgi?id=532
3
4 # Estimate ppc code compression with Libre-SOC encoding attempt v1.
5
6
7 # Copyright 2020 Alexandre Oliva
8
9 # This script is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 3, or (at your option)
12 # any later version.
13
14 # This script is distributed in the hope that it will be useful, but
15 # WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # General Public License for more details.
18
19 # You should have received a copy of the GNU General Public License
20 # along with this script; see the file COPYING3. If not see
21 # <http://www.gnu.org/licenses/>.
22
23 # Skeleton originally by Alexandre Oliva <oliva@gnu.org>.
24
25
26 # Feed this script the output of objdump -M raw --no-show-raw-insn ppc-prog
27
28 # It will look for insns that can be represented in compressed mode,
29 # according to the encoding rules in the copcond dictionary below.
30
31 # Nothing is assumed as to the actual bit-encoding of the insns, this
32 # is just to experiment with insn selection and get a quick feedback
33 # loop for the encoding options in compressed mode.
34
35 # In this script, the computations of encoding modes and transitions
36 # are those for attempt 1 encoding, that encompasses:
37
38 # - a 16-bit insn (with 10-bit payload) that may switch to compressed
39 # mode or return to 32-bit mode;
40
41 # - 16-bit insns in compressed mode, each with 2 bits devoted to
42 # encoding one of the following possibilities:
43
44 # -- switch back to uncompressed mode at the next insn
45
46 # -- interpret the next insn in uncompressed mode, then return to
47 # compressed mode
48
49 # -- remain in 16-bit mode for the next insn
50
51 # - a 16-bit immediate insn in compressed mode, that must be followed
52 # by another compressed insn
53
54 # At (visible) entry points, mode is forced to return to uncompressed
55 # mode. Every branch target must be in uncompressed mode as well, but
56 # this script does not enforce that.
57
58 # The entire code stream is printed, without any attempt to modify the
59 # addresses that go along with or in them; we only insert markers for
60 # the transition points, and for the compressed instructions.
61
62 # The really useful information is printed at the end: a summary of
63 # transition and compressed-insn counts, and the achieved compression
64 # rate.
65
66 import sys
67 import re
68
69 insn = re.compile('\s+(?P<addr>[0-9a-f]+):\s+(?P<opcode>[^ ]+) *(?P<operands>.*)')
70
71 # reg is a regkind (r, cr, fr) followed by a regnum
72 xreg = '(?P<reg>(?P<regkind>[cf]?r)(?P<regnum>[0-9]+))'
73
74 # immediate is a sequence of digits, possibly preceded by a negative sign
75 ximm = '(?P<immediate>-?[0-9]+)'
76
77 # branch is a branch target address; ignore an angle-bracketed label after it
78 xbrt = '(?P<branch>[0-9a-f]+)(?: <.*>)?'
79
80 # offset is like immediate, but followed by a parenthesized basereg
81 xoff = '(?P<offset>-?[0-9]+)\((?P<basereg>r[0-9]+)\)'
82
83 # creg is the cr, cond names one of its bits
84 crbit = '(?:4\*(?P<creg>cr[0-7])\+)?(?P<cond>gt|lt|eq|so)'
85
86 # Combine the above into alternatives, to easily classify operands by
87 # pattern matching.
88 opkind = re.compile('|'.join([xreg, ximm, xbrt, xoff, crbit]))
89
90 # Pre-parse and classify op into a mop, short for mapped op.
91 def mapop(op):
92 match = opkind.fullmatch(op)
93
94 if match is None:
95 op = ('other', op)
96 elif match['reg'] is not None:
97 op = (match['regkind'], int(match['regnum']), op)
98 elif match['immediate'] is not None:
99 op = ('imm', int (op).bit_length (), op)
100 elif match['branch'] is not None:
101 op = ('pcoff', (int (match['branch'], 16)
102 - int (addr, 16)).bit_length (), op, addr)
103 elif match['offset'] is not None:
104 op = ('ofst', mapop(match['offset']), mapop(match['basereg']), op)
105 elif match['cond'] is not None:
106 if match['creg'] is None:
107 creg = 'cr0'
108 else:
109 creg = match['creg']
110 op = ('crbit', mapop(creg), ('cond', match['cond']), op)
111 else:
112 raise "unrecognized operand kind"
113
114 return op
115
116 # Accessor to enable the mapop representation to change easily.
117 def opclass(mop):
118 return mop[0]
119
120 # Some opclass predicates, for the same reason.
121 def regp(mop):
122 return opclass(mop) in { 'r', 'fr', 'cr' } \
123 or (opclass(mop) is 'imm' and mop[1] is 0)
124 def immp(mop):
125 return opclass(mop) in { 'imm', 'pcoff' }
126 def rofp(mop):
127 return opclass(mop) is 'ofst'
128 def crbt(mop):
129 return opclass(mop) is 'crbit'
130
131 # Some more accessors.
132
133 # Return the reg number if mop fits regp.
134 def regno(mop):
135 if regp(mop) \
136 or (immp(mop) and mop[1] is 0):
137 return mop[1]
138 raise "operand is not a register"
139
140 def immval(mop):
141 if immp(mop):
142 return int(mop[2])
143 raise "operand is not an immediate"
144
145 # Return the immediate length if mop fits immp.
146 def immbits(mop):
147 if immp(mop):
148 return mop[1]
149 raise "operand is not an immediate"
150
151 # Return the register sub-mop if mop fits rofp.
152 def rofreg(mop):
153 if rofp(mop):
154 return mop[2]
155 raise "operand is not an offset"
156
157 # Return the offset sub-opt if mop fits rofp.
158 def rofset(mop):
159 if rofp(mop):
160 return mop[1]
161 raise "operand is not an offset"
162
163 # Return the register sub-mop if mop fits crbit.
164 def crbtreg(mop):
165 if crbt(mop):
166 return mop[1]
167 raise "operand is not a condition register bit"
168
169 # Return the cond bit name if mop fits crbit.
170 def crbtcnd(mop):
171 if crbt(mop):
172 return mop[2]
173 raise "operand is not a condition register bit"
174
175 # Following are predicates to be used in copcond, to tell the mode in
176 # which opcode with ops as operands is to be represented.
177
178 # TODO: use insn_histogram.py to show the best targets
179 # (remember to exclude nop - ori r0,r0,0 as this skews numbers)
180 # Registers representable in a made-up 3-bit mapping.
181 # It must contain 0 for proper working of at least storex.
182 cregs3 = { 0, 31, 1, 2, 3, 4, 5, 6, 7 }
183 # Ditto in a 2-bit mapping. It needs not contain 0, but it must be a
184 # subset of cregs3 for proper working of at least storex.
185 cregs2 = { 2, 3, 4, 5 }
186 # Use the same sets for FP for now.
187 cfregs3 = cregs3
188 cfregs2 = cregs2
189 ccregs2 = { 0, 1, 2, 3 }
190
191 # Return true iff mop is a regular register present in cregs2
192 def rcregs2(mop):
193 return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs2
194
195 # Return true iff mop is a regular register present in cregs3
196 def rcregs3(mop):
197 return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs3
198
199 # Return true iff mop is a floating-point register present in cfregs2
200 def rcfregs2(mop):
201 return opclass(mop) is 'fr' and regno(mop) in cfregs2
202
203 # Return true iff mop is a floating-point register present in cfregs3
204 def rcfregs3(mop):
205 return opclass(mop) is 'fr' and regno(mop) in cfregs3
206
207 # Return true iff mop is a condition register present in ccregs2
208 def rccregs2(mop):
209 return opclass(mop) is 'cr' and regno(mop) in ccregs2
210
211 # Return true iff mop is an immediate of at most 8 bits.
212 def imm8(mop):
213 return immp(mop) and immbits(mop) <= 8
214
215 # Return true iff mop is an immediate of at most 12 bits.
216 def imm12(mop):
217 return immp(mop) and immbits(mop) <= 12
218
219 # Compress binary opcodes iff the first two operands (output and first
220 # input operand) are registers representable in 3 bits in compressed
221 # mode, and the immediate operand can be represented in 8 bits.
222 def bin2regs3imm8(opcode, ops):
223 if rcregs3(ops[0]) and rcregs3(ops[1]) and imm8(ops[2]):
224 return 1
225 return 0
226
227 # Recognize do-nothing insns, particularly ori r0,r0,0.
228 def maybenop(opcode, ops):
229 if opcode in ['ori', 'addi'] and regno(ops[0]) is regno(ops[1]) \
230 and opclass(ops[0]) is 'r' and regno(ops[0]) is 0 \
231 and imm8(ops[2]) and immbits(ops[2]) is 0:
232 return 3
233 return 0
234
235 # Recognize an unconditional branch, that can be represented with a
236 # 6-bit operand in 10-bit mode, an an additional 4 bits in 16-bit
237 # mode. In both cases, the offset is shifted left by 2 bits.
238 def uncondbranch(opcode, ops):
239 if imm8(ops[0]):
240 return 3
241 if imm12(ops[0]):
242 return 1
243 return 0
244
245 # 2 bits for RT and RA. RB is r0 in 10-bit, and 3 bits in 16-bit ???
246 # there's a general assumption that, if an insn can be represented in
247 # 10-bits, then it can also be represented in 16 bits. This will not
248 # be the case if cregs3 can't represent register 0. For
249 # register+offset addresses, we support 16-imm stdi, fstdi, with 3-bit
250 # immediates left-shifted by 3; stwi, fstsi, with 2-bit immediates
251 # left-shifted by 2; stdspi for 6-bit immediate left-shifted by 3
252 # biased by -256, and stwspi for 6-bit immediate left-shifted by 2
253 # also biased by -256. fstdi and fstsi store in memory a
254 # floating-point register, the others do a general-purpose register.
255 def storexaddr(opcode, ops):
256 # Canonicalize offset in ops[1] to reg, imm
257 if rofp(ops[1]):
258 ops = (ops[0], rofreg(ops[1]), rofset(ops[1]))
259 shift = memshifts[opcode[-1]]
260 if immval(ops[2]) & ((1 << shift) - 1) is not 0:
261 return 0
262 if rcregs3(ops[1]) and immbits(ops[2]) <= shift + 3:
263 return 2
264 if regno(ops[1]) is 1 and opclass(ops[0]) is not 'fr' \
265 and (immval(ops[2]) - 256).bit_length() <= shift + 6:
266 return 2
267 # Require offset 0 for compression of non-indexed form.
268 if not regp(ops[2]):
269 return 0
270 # If any of the registers is zero, and the other fits in cregs2,
271 # it fits in 10-bit.
272 if (rcregs2(ops[1]) and regno(ops[2]) is 0) \
273 or (regno(ops[1]) is 0 and rcregs2(ops[2])):
274 return 3
275 # For 16-bit one must fit rcregs2 and the other rcregs3.
276 if (rcregs2(ops[1]) and rcregs3(ops[2])) \
277 or (rcregs3(ops[1]) and rcregs2(ops[2])):
278 return 1
279 return 0
280 def rstorex(opcode, ops):
281 if rcregs2(ops[0]):
282 return storexaddr(opcode, ops)
283 return 0
284 def frstorex(opcode, ops):
285 if rcfregs2(ops[0]):
286 return storexaddr(opcode, ops)
287 return 0
288
289 memshifts = { 'd': 3, 'w': 2, 'z': 2, 's': 2 }
290
291 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit,
292 # RB and RT must match. ??? It's not clear what that means WRT
293 # register mapping of different kinds of registers, e.g. when RT is a
294 # floating-point register..
295 # For register+offset addresses, we support 16-imm ldi, fldi, with
296 # 3-bit immediates left-shifted by 3; lwi, flsi, with 2-bit immediates
297 # left-shifted by 2; ldspi for 6-bit immediate left-shifted by 3
298 # biased by -256, and lwspi for 6-bit immediate left-shifted by 2 also
299 # biased by -256. fldi and flsi load to floating-point registers, the
300 # others load to general-purpose registers.
301 def loadxaddr(opcode, ops):
302 if rofp(ops[1]):
303 ops = (ops[0], rofreg(ops[1]), rofset(ops[1]))
304 shift = memshifts[opcode[-1]]
305 if immval(ops[2]) & ((1 << shift) - 1) is not 0:
306 return 0
307 if rcregs3(ops[1]) and immbits(ops[2]) <= shift + 3:
308 return 2
309 if regno(ops[1]) is 1 and opclass(ops[0]) is not 'fr' \
310 and (immval(ops[2]) - 256).bit_length() <= shift + 6:
311 return 2
312 # Otherwise require offset 0 for compression of non-indexed form.
313 if not regp(ops[2]):
314 return 0
315 if rcregs3(ops[1]) and rcregs3(ops[2]):
316 if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }:
317 return 3
318 return 1
319 return 0
320 def rloadx(opcode, ops):
321 if rcregs3(ops[0]):
322 return loadxaddr(opcode, ops)
323 return 0
324 def frloadx(opcode, ops):
325 if rcfregs3(ops[0]):
326 return loadxaddr(opcode, ops)
327 return 0
328
329 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit,
330 # RB and RT must match. RA must not be zero, but in 16-bit mode we
331 # can swap RA and RB to make it fit.
332 def addop(opcode, ops):
333 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
334 if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }:
335 return 3
336 if regno(ops[1]) is not 0 or regno(ops[2]) is not 0:
337 return 1
338 return 0
339
340 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit,
341 # RA and RT must match. ??? The spec says RB, but the actual opcode
342 # is subf., subtract from, and it subtracts RA from RB. 'neg.' would
343 # make no sense as described there if we didn't use RA.
344 def subfop(opcode, ops):
345 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
346 if regno(ops[0]) is regno(ops[1]):
347 return 3
348 return 1
349 return 0
350 def negop(opcode, ops):
351 if rcregs3(ops[0]) and rcregs3(ops[1]):
352 return 3
353 return 0
354
355 # 3 bits for RA and 3 bits for RB. L (op1) must be 1 for 10-bit.
356 # op0 is a cr, must be zero for 10-bit.
357 def cmpop(opcode, ops):
358 if rcregs3(ops[2]) and rcregs3(ops[3]):
359 if regno(ops[0]) is 0 and immval(ops[1]) is 1:
360 return 3
361 return 1
362 return 0
363
364 # 3 bits for RS, 3 bits for RB, 3 bits for RS, 16-bit only.
365 def sldop(opcode, ops):
366 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
367 return 1
368 return 0
369 # same as sld, except RS must be nonzero.
370 def srdop(opcode, ops):
371 if regno(ops[1]) is not 0:
372 return sldop(opcode, ops)
373 return 0
374 # same as sld, except RS is given by RA, so they must be the same.
375 def sradop(opcode, ops):
376 if regno(ops[0]) is regno(ops[1]):
377 return sldop(opcode, ops)
378 return 0
379
380 # binary logical ops: and, nand, or, nor.
381 # 3 bits for RA (nonzero), 3 bits for RB, 3 bits for RT in 16-bit mode.
382 # RT is implicitly RB in 10-bit mode.
383 def binlog1016ops(opcode, ops):
384 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \
385 and regno(ops[1]) is not 0:
386 # mr RT, RB AKA or RT, RB, RB takes the 10-bit encoding
387 # of the 16-bit nor; we've already ruled out r0 as RB above.
388 if regno(ops[0]) is regno(ops[2]) and opcode is not 'nor':
389 return 3
390 # or and and, with two identical inputs, stand for mr.
391 # nor and nand, likewise, stand for not, that has its
392 # own unary 10-bit encoding.
393 if regno(ops[1]) is regno(ops[2]):
394 return 3
395 return 1
396 return 0
397 # 3 bits for RB, 3 bits for RT in 16-bit mode.
398 # RT is implicitly RB in 10-bit mode.
399 def unlog1016ops(opcode, ops):
400 if rcregs3(ops[0]) and rcregs3(ops[1]):
401 if regno(ops[0]) is regno(ops[1]):
402 return 3
403 return 1
404 return 0
405 # 16-bit only logical ops; no 10-bit encoding available
406 # same constraints as the 1016 ones above.
407 def binlog16ops(opcode, ops):
408 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \
409 and regno(ops[1]) is not 0:
410 return 1
411 return 0
412 def unlog16ops(opcode, ops):
413 if rcregs3(ops[0]) and rcregs3(ops[1]):
414 return 1
415 return 0
416
417 # binary floating-point ops
418 # 3 bits for FRA (nonzero), 3 bits for FRB, 3 bits for FRT in 16-bit mode.
419 # FRT is implicitly FRB in 10-bit mode.
420 def binfp1016ops(opcode, ops):
421 if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \
422 and regno(ops[1]) is not 0:
423 if regno(ops[0]) is regno(ops[2]):
424 return 3
425 return 1
426 return 0
427 def unfp1016ops(opcode, ops):
428 if rcfregs3(ops[0]) and rcfregs3(ops[1]):
429 if regno(ops[0]) is regno(ops[1]):
430 return 3
431 return 1
432 return 0
433 def binfp16ops(opcode, ops):
434 if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \
435 and regno(ops[1]) is not 0:
436 return 1
437 return 0
438 def unfp16ops(opcode, ops):
439 if rcfregs3(ops[0]) and rcfregs3(ops[1]):
440 return 1
441 return 0
442
443 def cnvfp16ops(opcode, ops):
444 if rcfregs2(ops[0]) and rcfregs2(ops[1]):
445 return 1
446 return 0
447
448 # Move between CRs. 3 bits for destination, 3 bits for source in
449 # 16-bit mode. That covers all possibilities. For 10-bit mode, only
450 # 2 bits for destination.
451 def mcrfop(opcode, ops):
452 if rccregs2(ops[0]):
453 return 3
454 return 1
455 # Logical ops between two CRs into one. 2 bits for destination, that
456 # must coincide with one of the inputs, 3 bits for the other input.
457 # 16-bit only.
458 def crops(opcode, ops):
459 if rccregs2(ops[0]) and regno(ops[0]) is regno(ops[1]):
460 return 1
461 return 0
462
463 # 3 bits for general-purpose register; immediate identifies the
464 # special purpose register to move to: 8 for lr, 9 for ctr. 16-bit
465 # only. mtspr imm,rN moves from rN to the spr; mfspr rN,imm moves
466 # from spr to rN.
467 def mtsprops(opcode, ops):
468 if immval(ops[0]) in (8, 9) and rcregs3(ops[1]):
469 return 1
470 return 0
471 def mfsprops(opcode, ops):
472 if immval(ops[1]) in (8, 9) and rcregs3(ops[0]):
473 return 1
474 return 0
475
476 # 3 bits for nonzero general-purpose register; the immediate is a
477 # per-CR mask (8-bits). mtcr rN is mtcrf 0xFF, rN. mfcr rN is a raw
478 # opcode, not an alias.
479 def mtcrfops(opcode, ops):
480 if immval(ops[0]) is 255 and rcregs3(ops[1]) and regno(ops[1]) is not 0:
481 return 1
482 return 0
483 def mfcrops(opcode, ops):
484 if rcregs3(ops[0]) and regno(ops[0]) is not 0:
485 return 1
486 return 0
487
488 # 3 bits for destination and source register, must be the same. Full
489 # shift range fits. 16-imm format.
490 def shiftops(opcode, ops):
491 if rcregs3(ops[0]) and regno(ops[0]) is regno(ops[1]):
492 return 2
493 return 0
494
495 # For 16-imm 'addis' and 'addi', we have 3 bits (nonzero) for the
496 # destination register, source register is implied 0, the immediate
497 # must either fit in signed 5-bit, left-shifted by 3, or in signed
498 # 7-bit without shift. ??? That seems backwards.
499 def addiops(opcode, ops):
500 if rcregs3(ops[0]) and regno(ops[0]) is not 0 \
501 and regno(ops[1]) is 0 and imm8(ops[2]) \
502 and immbits(ops[2]) <= 8 \
503 and ((immval(ops[2]) & 7) is 0 or immbits(ops[2]) <= 7):
504 return 2
505 return maybenop(opcode, ops)
506
507 # cmpdi and cmpwi are aliases to uncompressed cmp CR#, L, RA, imm16,
508 # CR# being the target condition register, L being set for d rather
509 # than w. In 16-imm, CR# must be zero, RA must fit in 3 bits, and the
510 # immediate must be 6 bits signed.
511 def cmpiops(opcode, ops):
512 if regno(ops[0]) is 0 and immval(ops[1]) in (0,1) \
513 and rcregs3(ops[2]) and immbits(ops[3]) <= 6:
514 return 2
515 return 0
516
517 # 16-imm bc, with or without LK, uses 3 bits for BI (CR0 and CR1 only),
518 # and 1 bit for BO1 (to tell BO 12 from negated 4).
519 def bcops(opcode, ops):
520 if immval(ops[0]) in (4,12) and regno(crbtreg(ops[1])) <= 1 \
521 and immbits(ops[2]) <= 8:
522 return 2
523 return 0
524
525 # 2 bits for BI and 3 bits for BO in 10-bit encoding; one extra bit
526 # for each in 16-bit.
527 def bclrops(opcode, ops):
528 if immval(ops[0]) <= 15 and regno(crbtreg(ops[1])) <= 1 \
529 and immbits(ops[2]) is 0:
530 if immval(ops[0]) <= 7 and regno(crbtreg(ops[1])) is 0:
531 return 3
532 return 1
533 return 0
534
535 # Map opcodes that might be compressed to a function that returns the
536 # best potential encoding kind for the insn, per the numeric coding
537 # below.
538 copcond = {
539 'ori': maybenop,
540 # 'attn': binutils won't ever print this
541 'b': uncondbranch, 'bl': uncondbranch,
542 'bc': bcops, 'bcl': bcops,
543 'bclr': bclrops, 'bclrl': bclrops,
544 # Stores and loads, including 16-imm ones
545 'stdx': rstorex, 'stwx': rstorex,
546 'std': rstorex, 'stw': rstorex, # only offset zero
547 'stfdx': frstorex, 'stfsx': frstorex,
548 'stfd': frstorex, 'stfs': frstorex, # only offset zero
549 # Assuming lwz* rather than lwa*.
550 'ldx': rloadx, 'lwzx': rloadx,
551 'ld': rloadx, 'lwz': rloadx, # only offset zero
552 'lfdx': rloadx, 'lfsx': rloadx,
553 'lfd': rloadx, 'lfs': rloadx, # only offset zero
554 'add': addop,
555 'subf.': subfop, 'neg.': negop,
556 # Assuming cmpl stands for cmpd, i.e., cmp with L=1.
557 # cmpw is cmp with L=0, 16-bit only.
558 'cmp': cmpop,
559 'sld.': sldop, 'srd.': srdop, 'srad.': sradop,
560 'and': binlog1016ops, 'nand': binlog1016ops,
561 'or': binlog1016ops, 'nor': binlog1016ops,
562 # assuming popcnt and cntlz mean the *d opcodes.
563 'popcntd': unlog1016ops, 'cntlzd': unlog1016ops, 'extsw': unlog1016ops,
564 # not RT, RB is mapped to nand/nor RT, RB, RB.
565 'xor': binlog16ops, 'eqv': binlog16ops,
566 # 'setvl.': unlog16ops, # ??? What's 'setvl.'?
567 # assuming cnttz mean the *d opcode.
568 'cnttzd': unlog16ops, 'extsb': unlog16ops, 'extsh': unlog16ops,
569 'fsub.': binfp1016ops, 'fadd': binfp1016ops, 'fmul': binfp1016ops,
570 'fneg.': unfp1016ops,
571 'fdiv': binfp16ops,
572 'fabs.': unfp16ops, 'fmr.': unfp16ops,
573 # ??? are these the intended fp2int and int2fp, for all
574 # combinations of signed/unsigned float/double?
575 'fcfid': cnvfp16ops, 'fctidz': cnvfp16ops,
576 'fcfidu': cnvfp16ops, 'fctiduz': cnvfp16ops,
577 'fcfids': cnvfp16ops, 'fctiwz': cnvfp16ops,
578 'fcfidus': cnvfp16ops, 'fctiwuz': cnvfp16ops,
579 # Condition register opcodes.
580 'mcrf': mcrfop,
581 'crnor': crops,
582 'crandc': crops,
583 'crxor': crops,
584 'crnand': crops,
585 'crand': crops,
586 'creqv': crops,
587 'crorc': crops,
588 'cror': crops,
589 # System opcodes.
590 # 'cbank' is not a ppc opcode, not handled
591 'mtspr': mtsprops, # raw opcode for 'mtlr', 'mtctr'
592 'mfspr': mfsprops, # raw opcode for 'mflr', 'mfctr'
593 'mtcrf': mtcrfops, # raw opcode for 'mtcr'
594 'mfcr': mfcrops,
595 # 16-imm opcodes.
596 'sradi.': shiftops, 'srawi.': shiftops,
597 'addi': addiops,
598 'cmpi': cmpiops, # raw opcode for 'cmpwi', 'cmpdi'
599 # 'setvli', 'setmvli' are not ppc opcodes, not handled.
600 }
601
602 # We have 4 kinds of insns:
603
604 # 0: uncompressed; leave input insn unchanged
605 # 1: 16-bit compressed, only in compressed mode
606 # 2: 16-imm, i.e., compressed insn that can't switch-out of compressed mode
607 # 3: 10-bit compressed, may switch to compressed mode
608
609 # count[0:3] count the occurrences of the base kinds.
610 # count[4] counts extra 10-bit nop-switches to compressed mode,
611 # tentatively introduced before insns that can be 16-bit encoded.
612 # count[5] counts extra 10-bit nop-switches to compressed mode,
613 # tentatively introduced before insns that can be 16-imm encoded.
614 # count[6] counts extra 16-bit nop-switches back to uncompressed,
615 # introduced after a 16-imm insn.
616 # count[7] counts pairs of 10-bit nop-switches and 16-imm insns
617 # that turned out to be followed by 32-bit insns. We assume
618 # a compressor would backtrack the pair into as a single 32-bit
619 # insn, so as to avoid a switch-back nop. The nop and 16-imm
620 # insns remain counted as such, so we count these occurrences
621 # here.
622 count = [0,0,0,0,0,0,0,0]
623 # Default comments for the insn kinds above.
624 comments = ['', '\t; 16-bit', '\t; 16-imm', '\t; 10-bit']
625
626 # curi stands for the insn kind that we read and processed in the
627 # previous iteration of the loop, and previ is the one before it. the
628 # one we're processing in the current iteration will be stored in
629 # nexti until we make it curi at the very end of the loop.
630 previ = curi = 0
631
632 for line in sys.stdin:
633 if line[-1] is '\n':
634 line = line[:-1]
635
636 match = insn.fullmatch(line)
637 if match is None:
638 print(line)
639 # Switch to uncompressed mode at function boundaries
640 previ = curi = 0
641 continue
642
643 addr = match['addr']
644 opcode = match['opcode']
645 operands = match['operands']
646
647 if opcode in copcond:
648 nexti = copcond[opcode](opcode,
649 [mapop(op) for op in operands.split(',')])
650 else:
651 nexti = 0
652
653 comment = None
654
655 if curi is 0:
656 if nexti is 0:
657 True # Uncompressed mode for good.
658 elif nexti is 1:
659 # If curi was not a single uncompressed mode insn,
660 # tentatively encode a 10-bit nop to enter compressed
661 # mode, and then 16-bit. It takes as much space as
662 # encoding as 32-bit, but offers more possibilities for
663 # subsequent compressed encodings. A compressor proper
664 # would have to go back and change the encoding
665 # afterwards, but wé re just counting.
666 if previ is not 1:
667 print('\t\th.nop\t\t; 10-bit (tentative)')
668 count[4] += 1
669 comment = '16-bit (tentative)'
670 else:
671 comment = '16-bit auto-back'
672 elif nexti is 2:
673 # We can use compressed encoding for the 16-imm nexti
674 # after an uncompressed insn without penalty if it's the
675 # single-insn uncompressed mode slot. For other
676 # configurations, we can either remain in uncompressed
677 # mode, or switch to compressed mode with a 10-bit nop.
678 if previ is not 1:
679 print('\t\th.nop\t\t; 10-bit (tentative)')
680 count[5] += 1
681 comment = '16-imm (tentative), vs uncompressed'
682 else:
683 comment = '16-imm auto-back'
684 elif nexti is 3:
685 # If previ was 16-bit compressed, curi would be in the
686 # single-insn uncompressed slot, so nexti could be encoded
687 # as 16-bit, enabling another 1-insn uncompressed slot
688 # after nexti that a 10-bit insn wouldn't, so make it so.
689 if previ is 1:
690 nexti = 1
691 comment = '16-bit auto-back, vs 10-bit'
692 elif curi is 1:
693 # After a 16-bit insn, anything goes. If it remains in 16-bit
694 # mode, we can have 1 or 2 as nexti; if it returns to 32-bit
695 # mode, we can have 0 or 3. Using 1 instead of 3 makes room
696 # for a subsequent single-insn compressed mode, so prefer
697 # that.
698 if nexti is 3:
699 nexti = 1
700 comment = '16-bit, vs 10-bit'
701 elif curi is 2:
702 # After a 16-imm insn, we can only switch back to uncompressed
703 # mode with a 16-bit nop.
704 if nexti is 0:
705 if previ is 0:
706 print('\t\t\t\t; backtracking pair above to 32-bit')
707 count[7] += 1
708 curi = 0
709 else:
710 print('\t\tc.nop\t\t; forced switch back to uncompressed mode')
711 count[6] += 1
712 previ = curi
713 curi = 1
714 elif nexti is 3:
715 nexti = 1
716 elif curi is 3:
717 # After a 10-bit insn, another insn that could be encoded as
718 # 10-bit might as well be encoded as 16-bit, to make room for
719 # a single-insn uncompressed insn afterwards.
720 if nexti is 3:
721 nexti = 1
722 comment = '16-bit, vs 10-bit'
723 else:
724 raise "unknown mode for previious insn"
725
726 count[nexti] += 1
727
728 if comment is None:
729 comment = comments[nexti]
730 else:
731 comment = '\t; ' + comment
732
733 print(line + comment)
734
735 previ = curi
736 curi = nexti
737
738 transition_bytes = 2 * (count[4] + count[5] + count[6])
739 compressed_bytes = 2 * (count[1] + count[2] + count[3])
740 uncompressed_bytes = 4 * count[0]
741 total_bytes = transition_bytes + compressed_bytes + uncompressed_bytes
742 original_bytes = 2 * compressed_bytes + uncompressed_bytes
743
744 print()
745 print('Summary')
746 print('32-bit uncompressed instructions: %i' % count[0])
747 print('16-bit compressed instructions: %i' % count[1])
748 print('16-imm compressed-mode instructions: %i' % count[2])
749 print('10-bit compressed instructions: %i' % count[3])
750 print('10-bit mode-switching nops: %i' % count[4])
751 print('10-bit mode-switching nops for imm-16: %i' % count[5])
752 print('16-bit mode-switching nops after imm-16: %i' % count[6])
753 print('10-bit nop+16-imm pairs above, backtracked to 32-bit: %i' % count[7])
754 print('Compressed size estimate: %i' % total_bytes)
755 print('Original size: %i' % original_bytes)
756 print('Compressed/original ratio: %f' % (total_bytes / original_bytes))