comp16-v1-skel: first cut at the logic for attempt 1 encoding, untested
[libreriscv.git] / lxo / 532 / comp16-v1-skel.py
1 #! /bin/env python3
2
3 # Feed this script the output of objdump -M raw --no-show-raw-insn ppc-prog
4
5 # It will look for insns that can be represented in compressed mode,
6 # according to the encoding rules in the copcond dictionary below.
7
8 # Nothing is assumed as to the actual bit-encoding of the insns, this
9 # is just to experiment with insn selection and get a quick feedback
10 # loop for the encoding options in compressed mode.
11
12 # In this script, the computations of encoding modes and transitions
13 # are those for attempt 1 encoding, that encompasses:
14
15 # - a 16-bit insn (with 10-bit payload) that may switch to compressed
16 # mode or return to 32-bit mode;
17
18 # - 16-bit insns in compressed mode, each with 2 bits devoted to
19 # encoding one of the following possibilities:
20
21 # -- switch back to uncompressed mode at the next insn
22
23 # -- interpret the next insn in uncompressed mode, then return to
24 # compressed mode
25
26 # -- remain in 16-bit mode for the next insn
27
28 # -- take the 16 bits that would be the next compressed insn as an
29 # extension to the present 16-bit insn, and remain in 16-bit mode for
30 # the subsequent 16-bits
31
32 # At (visible) entry points, mode is forced to return to uncompressed
33 # mode.
34
35 # The entire code stream is printed, without any attempt to modify the
36 # addresses that go along with or in them; we only insert markers for
37 # the transition points, and for the compressed instructions.
38
39 # The really useful information is printed at the end: a summary of
40 # transition and compressed-insn counts, and the achieved compression
41 # rate.
42
43 import sys
44 import re
45
46 insn = re.compile('\s+(?P<addr>[0-9a-f]+):\s+(?P<opcode>[^ ]+) *(?P<operands>.*)')
47
48 opkind = re.compile('(?P<reg>(?P<regkind>[cf]?r)(?P<regnum>[0-9]+))|(?P<immediate>-?[0-9]+)|(?P<branch>[0-9a-f]+)(?: <.*>)?|(?P<offset>-?[0-9]+)\((?P<basereg>r[0-9]+)\)')
49
50 def mapop(op):
51 match = opkind.fullmatch(op)
52
53 if match is None:
54 op = ('other', op)
55 elif match['reg'] is not None:
56 op = (match['regkind'], int(match['regnum']), op)
57 elif match['immediate'] is not None:
58 op = ('imm', int (op).bit_length (), op)
59 elif match['branch'] is not None:
60 op = ('pcoff', (int (match['branch'], 16)
61 - int (addr, 16)).bit_length (), op, addr)
62 elif match['offset'] is not None:
63 op = ('ofst', mapop(match['offset']), mapop(match['basereg']), op)
64 else:
65 raise "unrecognized operand kind"
66
67 return op
68
69 def opclass(mop):
70 return mop[0]
71 def regno(mop):
72 if mop[0] in { 'r', 'fr', 'cr' }:
73 return mop[1]
74 else:
75 raise "operand is not a register"
76
77 def immbits(mop):
78 if mop[0] is 'imm':
79 return mop[1]
80 else:
81 raise "operand is not an immediate"
82
83 # Following are predicates to be used in copcond, to tell the mode in
84 # which opcode with ops as operands is to be represented
85
86 # Any occurrence of the opcode can be compressed.
87 def anyops(opcode, ops):
88 return 1
89
90 # Compress iff first and second operands are the same.
91 def same01(opcode, ops):
92 if ops[0] == ops[1]:
93 return 1
94 else:
95 return 0
96
97 # Registers representable in a made-up 3-bit mapping.
98 cregs2 = { 1, 2, 3, 4, 5, 6, 7, 31 }
99
100 # Return true iff mop is a regular register present in cregs2
101 def bin2regs3(mop):
102 return opclass(mop) is 'r' and regno(mop) in cregs2
103
104 # Return true iff mop is an immediate of at most 8 bits.
105 def imm8(mop):
106 return opclass(mop) is 'imm' and immbits(mop) <= 8
107
108 # Compress binary opcodes iff the first two operands (output and first
109 # input operand) are registers representable in 3 bits in compressed
110 # mode, and the immediate operand can be represented in 8 bits.
111 def bin2regs3imm8(opcode, ops):
112 if bin2regs3(ops[0]) and bin2regs3(ops[1]) and imm8(ops[2]):
113 return 1
114 else:
115 return 0
116
117 # Map opcodes that might be compressed to a function that returns the
118 # best potential encoding kind for the insn, per the numeric coding
119 # below.
120 copcond = {
121
122 }
123
124 # We have 4 kinds of insns:
125
126 # 0: uncompressed; leave input insn unchanged
127 # 1: 16-bit compressed, only in compressed mode
128 # 2: 16-bit extended by another 16-bit, only in compressed mode
129 # 3: 10-bit compressed, may switch to compressed mode
130
131 # count[0:3] count the occurrences of the base kinds.
132 # count[4] counts extra 10-bit nop-switches to compressed mode,
133 # tentatively introduced before insns that can be 16-bit encoded.
134 count = [0,0,0,0,0]
135 # Default comments for the insn kinds above. 2 is always tentative.
136 comments = ['', '\t; 16-bit', '\t; tentative 16+16-bit', '\t; 10-bit']
137
138 # cur stands for the insn kind that we read and processed in the
139 # previous iteration of the loop, and prev is the one before it. the
140 # one we're processing in the current iteration will be stored in
141 # next until we make it cur at the very end of the loop.
142 prev = cur = 0
143
144 for line in sys.stdin:
145 if line[-1] is '\n':
146 line = line[:-1]
147
148 match = insn.fullmatch(line)
149 if match is None:
150 print(line)
151 # Switch to uncompressed mode at function boundaries
152 prev = prev2 = 0
153 continue
154
155 addr = match['addr']
156 opcode = match['opcode']
157 operands = match['operands']
158
159 if opcode in copcond:
160 next = copcond[opcode](opcode,
161 [mapop(op) for op in operands.split(',')])
162 else:
163 next = 0
164
165 comment = None
166
167 if cur is 0:
168 if next is 0:
169 True # Uncompressed mode for good.
170 elif next is 1:
171 # If cur was not a single uncompressed mode insn,
172 # tentatively encode a 10-bit nop to enter compressed
173 # mode, and then 16-bit. It takes as much space as
174 # encoding as 32-bit, but offers more possibilities for
175 # subsequent compressed encodings. A compressor proper
176 # would have to go back and change the encoding
177 # afterwards, but wé re just counting.
178 if prev is not 1:
179 print('\t\th.nop\t\t; tentatively switch to compressed mode')
180 count[4] += 1
181 comment = 'tentatively compressed to 16-bit'
182 elif next is 2:
183 # We can use compressed encoding for next after an
184 # uncompressed insn only if it's the single-insn
185 # uncompressed mode slot. For anything else, we're better
186 # off using uncompressed encoding for next, since it makes
187 # no sense to spend a 10-bit nop to switch to compressed
188 # mode for a 16+16-bit insn. If subsequent insns would
189 # benefit from compressed encoding, we can switch then.
190 if prev is not 1:
191 next = 0
192 comment = 'not worth a nop for 16+16-bit'
193 elif next is 3:
194 # If prev was 16-bit compressed, cur would be in the
195 # single-insn uncompressed slot, so next could be encoded
196 # as 16-bit, enabling another 1-insn uncompressed slot
197 # after next that a 10-bit insn wouldn't, so make it so.
198 if prev is 1:
199 next = 1
200 comment = '16-bit, could be 10-bit'
201 elif cur is 1:
202 # After a 16-bit insn, anything goes. If it remains in 16-bit
203 # mode, we can have 1 or 2 as next; if it returns to 32-bit
204 # mode, we can have 0 or 3. Using 1 instead of 3 makes room
205 # for a subsequent single-insn compressed mode, so prefer
206 # that.
207 if next is 3:
208 next = 1
209 comment = '16-bit, could be 10-bit'
210 elif cur is 2:
211 # After a 16+16-bit insn, we can't switch directly to 32-bit
212 # mode. However, cur could have been encoded as 32-bit, since
213 # any 16+16-bit insn can. Indeed, we may have an arbitrary
214 # long sequence of 16+16-bit insns before next, and if next
215 # can only be encoded in 32-bit mode, we can "resolve" all
216 # previous adjacent 16+16-bit insns to the corresponding
217 # 32-bit insns in the encoding, and "adjust" the 16-bit or
218 # 10-bit insn that enabled the potential 16+16-bit encoding to
219 # switch to 32-bit mode then instead.
220 if next is 0:
221 prev = cur = 0
222 comment = '32-bit, like tentative 16+16-bit insns above'
223 elif cur is 3:
224 # After a 10-bit insn, another insn that could be encoded as
225 # 10-bit might as well be encoded as 16-bit, to make room for
226 # a single-insn uncompressed insn afterwards.
227 if next is 3:
228 next = 1
229 comment = '16-bit, could be 10-bit'
230 else:
231 raise "unknown mode for previous insn"
232
233 count[next] += 1
234
235 if comment is None:
236 comment = comments[next]
237 else:
238 comment = '\t; ' + comment
239
240 print(line + comment)
241
242 prev = cur
243 cur = next
244
245 transition_bytes = 2 * count[4]
246 compressed_bytes = 2 * (count[1] + count[3])
247 uncompressed_bytes = 4 * (count[0] + count[2])
248 total_bytes = transition_bytes + compressed_bytes + uncompressed_bytes
249 original_bytes = 2 * compressed_bytes + uncompressed_bytes
250
251 print()
252 print('Summary')
253 print('32-bit uncompressed instructions: %i' % count[0])
254 print('16-bit compressed instructions: %i' % count[1])
255 print('16+16-bit (tentative) compressed-mode instructions: %i' % count[2])
256 print('10-bit compressed instructions: %i' % count[3])
257 print('10-bit (tentative) mode-switching nops: %i' % count[4])
258 print('Compressed size estimate: %i' % total_bytes)
259 print('Original size: %i' % original_bytes)
260 print('Compressed/original ratio: %f' % (total_bytes / original_bytes))