TOOM-2 256x256->512-bit [un]signed*[un]signed mul works!
[bigint-presentation-code.git] / src / bigint_presentation_code / _tests / test_toom_cook.py
1 from contextlib import contextmanager
2 import unittest
3 from typing import Any, Callable, ContextManager, Iterator, Tuple
4
5 from bigint_presentation_code.compiler_ir import (GPR_SIZE_IN_BITS,
6 GPR_SIZE_IN_BYTES,
7 GPR_VALUE_MASK, BaseSimState,
8 Fn, GenAsmState, OpKind,
9 PostRASimState,
10 PreRASimState, SSAVal)
11 from bigint_presentation_code.register_allocator import allocate_registers
12 from bigint_presentation_code.toom_cook import (ToomCookInstance, ToomCookMul,
13 simple_mul)
14
15 _StateFactory = Callable[[], ContextManager[BaseSimState]]
16
17
18 def simple_umul(fn, lhs, rhs):
19 # type: (Fn, SSAVal, SSAVal) -> tuple[SSAVal, None]
20 return simple_mul(fn=fn, lhs=lhs, lhs_signed=False, rhs=rhs,
21 rhs_signed=False, name="mul"), None
22
23
24 def get_pre_ra_state_factory(code):
25 # type: (Mul) -> _StateFactory
26 @contextmanager
27 def state_factory():
28 state = PreRASimState(ssa_vals={}, memory={})
29 with state.set_as_current_debugging_state():
30 yield state
31 return state_factory
32
33
34 class Mul:
35 _MulFn = Callable[[Fn, SSAVal, SSAVal], Tuple[SSAVal, Any]]
36
37 def __init__(self, mul, lhs_size_in_words, rhs_size_in_words):
38 # type: (_MulFn, int, int) -> None
39 super().__init__()
40 self.fn = fn = Fn()
41 self.dest_offset = 0
42 self.dest_size_in_words = lhs_size_in_words + rhs_size_in_words
43 self.dest_size_in_bytes = self.dest_size_in_words * GPR_SIZE_IN_BYTES
44 self.lhs_size_in_words = lhs_size_in_words
45 self.lhs_size_in_bytes = self.lhs_size_in_words * GPR_SIZE_IN_BYTES
46 self.rhs_size_in_words = rhs_size_in_words
47 self.rhs_size_in_bytes = self.rhs_size_in_words * GPR_SIZE_IN_BYTES
48 self.lhs_offset = self.dest_size_in_bytes + self.dest_offset
49 self.rhs_offset = self.lhs_size_in_bytes + self.lhs_offset
50 self.ptr_in = fn.append_new_op(kind=OpKind.FuncArgR3,
51 name="ptr_in").outputs[0]
52 self.lhs_setvl = fn.append_new_op(
53 kind=OpKind.SetVLI, immediates=[lhs_size_in_words],
54 maxvl=lhs_size_in_words, name="lhs_setvl")
55 self.load_lhs = fn.append_new_op(
56 kind=OpKind.SvLd, immediates=[self.lhs_offset],
57 input_vals=[self.ptr_in, self.lhs_setvl.outputs[0]],
58 name="load_lhs", maxvl=lhs_size_in_words)
59 self.rhs_setvl = fn.append_new_op(
60 kind=OpKind.SetVLI, immediates=[rhs_size_in_words],
61 maxvl=rhs_size_in_words, name="rhs_setvl")
62 self.load_rhs = fn.append_new_op(
63 kind=OpKind.SvLd, immediates=[self.rhs_offset],
64 input_vals=[self.ptr_in, self.rhs_setvl.outputs[0]],
65 name="load_rhs", maxvl=rhs_size_in_words)
66 self.retval = mul(
67 fn, self.load_lhs.outputs[0], self.load_rhs.outputs[0])
68 self.dest_setvl = fn.append_new_op(
69 kind=OpKind.SetVLI, immediates=[self.dest_size_in_words],
70 maxvl=self.dest_size_in_words, name="dest_setvl")
71 self.store = fn.append_new_op(
72 kind=OpKind.SvStd,
73 input_vals=[self.retval[0], self.ptr_in,
74 self.dest_setvl.outputs[0]],
75 immediates=[self.dest_offset], maxvl=self.dest_size_in_words,
76 name="store_dest")
77
78
79 def get_post_ra_state_factory(code):
80 # type: (Mul) -> _StateFactory
81 ssa_val_to_loc_map = allocate_registers(code.fn)
82
83 @contextmanager
84 def state_factory():
85 yield PostRASimState(
86 ssa_val_to_loc_map=ssa_val_to_loc_map,
87 memory={}, loc_values={})
88 return state_factory
89
90
91 class TestToomCook(unittest.TestCase):
92 maxDiff = None
93
94 def test_toom_2_repr(self):
95 TOOM_2 = ToomCookInstance.make_toom_2()
96 # print(repr(repr(TOOM_2)))
97 self.assertEqual(
98 repr(TOOM_2),
99 "ToomCookInstance(lhs_part_count=2, rhs_part_count=2, "
100 "eval_points=(0, 1, POINT_AT_INFINITY), "
101 "lhs_eval_ops=("
102 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
103 "EvalOpAdd(lhs="
104 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
105 "rhs="
106 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
107 "poly=EvalOpPoly({0: Fraction(1, 1), 1: Fraction(1, 1)})), "
108 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)}))),"
109 " rhs_eval_ops=("
110 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
111 "EvalOpAdd(lhs="
112 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
113 "rhs="
114 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
115 "poly=EvalOpPoly({0: Fraction(1, 1), 1: Fraction(1, 1)})), "
116 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)}))),"
117 " prod_eval_ops=("
118 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
119 "EvalOpSub(lhs="
120 "EvalOpSub(lhs="
121 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
122 "rhs="
123 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
124 "poly=EvalOpPoly({0: Fraction(-1, 1), 1: Fraction(1, 1)})), "
125 "rhs="
126 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)})), "
127 "poly=EvalOpPoly({"
128 "0: Fraction(-1, 1), 1: Fraction(1, 1), 2: Fraction(-1, 1)})), "
129 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)}))))"
130 )
131
132 def test_toom_2_5_repr(self):
133 TOOM_2_5 = ToomCookInstance.make_toom_2_5()
134 # print(repr(repr(TOOM_2_5)))
135 self.assertEqual(
136 repr(TOOM_2_5),
137 "ToomCookInstance(lhs_part_count=3, rhs_part_count=2, "
138 "eval_points=(0, 1, -1, POINT_AT_INFINITY), lhs_eval_ops=("
139 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
140 "EvalOpAdd(lhs="
141 "EvalOpAdd(lhs="
142 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
143 "rhs=EvalOpInput(lhs=2, rhs=0, "
144 "poly=EvalOpPoly({2: Fraction(1, 1)})), "
145 "poly=EvalOpPoly({0: Fraction(1, 1), 2: Fraction(1, 1)})), "
146 "rhs=EvalOpInput(lhs=1, rhs=0, "
147 "poly=EvalOpPoly({1: Fraction(1, 1)})), "
148 "poly=EvalOpPoly({"
149 "0: Fraction(1, 1), 1: Fraction(1, 1), 2: Fraction(1, 1)})), "
150 "EvalOpSub(lhs="
151 "EvalOpAdd(lhs="
152 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
153 "rhs=EvalOpInput(lhs=2, rhs=0, "
154 "poly=EvalOpPoly({2: Fraction(1, 1)})), "
155 "poly=EvalOpPoly({0: Fraction(1, 1), 2: Fraction(1, 1)})), "
156 "rhs=EvalOpInput(lhs=1, rhs=0, "
157 "poly=EvalOpPoly({1: Fraction(1, 1)})), poly=EvalOpPoly("
158 "{0: Fraction(1, 1), 1: Fraction(-1, 1), 2: Fraction(1, 1)})), "
159 "EvalOpInput(lhs=2, rhs=0, "
160 "poly=EvalOpPoly({2: Fraction(1, 1)}))), rhs_eval_ops=("
161 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
162 "EvalOpAdd(lhs=EvalOpInput(lhs=0, rhs=0, "
163 "poly=EvalOpPoly({0: Fraction(1, 1)})), rhs="
164 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
165 "poly=EvalOpPoly({0: Fraction(1, 1), 1: Fraction(1, 1)})), "
166 "EvalOpSub(lhs="
167 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
168 "rhs=EvalOpInput(lhs=1, rhs=0, "
169 "poly=EvalOpPoly({1: Fraction(1, 1)})), "
170 "poly=EvalOpPoly({0: Fraction(1, 1), 1: Fraction(-1, 1)})), "
171 "EvalOpInput(lhs=1, rhs=0, "
172 "poly=EvalOpPoly({1: Fraction(1, 1)}))), "
173 "prod_eval_ops=("
174 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
175 "EvalOpSub(lhs=EvalOpExactDiv(lhs=EvalOpSub(lhs="
176 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
177 "rhs=EvalOpInput(lhs=2, rhs=0, "
178 "poly=EvalOpPoly({2: Fraction(1, 1)})), "
179 "poly=EvalOpPoly({1: Fraction(1, 1), 2: Fraction(-1, 1)})), "
180 "rhs=2, "
181 "poly=EvalOpPoly({1: Fraction(1, 2), 2: Fraction(-1, 2)})), rhs="
182 "EvalOpInput(lhs=3, rhs=0, poly=EvalOpPoly({3: Fraction(1, 1)})), "
183 "poly=EvalOpPoly("
184 "{1: Fraction(1, 2), 2: Fraction(-1, 2), 3: Fraction(-1, 1)})), "
185 "EvalOpSub(lhs=EvalOpExactDiv(lhs=EvalOpAdd(lhs="
186 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
187 "rhs="
188 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)})), "
189 "poly=EvalOpPoly({1: Fraction(1, 1), 2: Fraction(1, 1)})), rhs=2, "
190 "poly=EvalOpPoly({1: Fraction(1, 2), 2: Fraction(1, 2)})), rhs="
191 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
192 "poly=EvalOpPoly("
193 "{0: Fraction(-1, 1), 1: Fraction(1, 2), 2: Fraction(1, 2)})), "
194 "EvalOpInput(lhs=3, rhs=0, poly=EvalOpPoly({3: Fraction(1, 1)}))))"
195 )
196
197 def test_reversed_toom_2_5_repr(self):
198 TOOM_2_5 = ToomCookInstance.make_toom_2_5().reversed()
199 # print(repr(repr(TOOM_2_5)))
200 self.assertEqual(
201 repr(TOOM_2_5),
202 "ToomCookInstance(lhs_part_count=2, rhs_part_count=3, "
203 "eval_points=(0, 1, -1, POINT_AT_INFINITY), lhs_eval_ops=("
204 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
205 "EvalOpAdd(lhs="
206 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
207 "rhs="
208 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
209 "poly=EvalOpPoly({0: Fraction(1, 1), 1: Fraction(1, 1)})), "
210 "EvalOpSub(lhs="
211 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
212 "rhs="
213 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
214 "poly=EvalOpPoly({0: Fraction(1, 1), 1: Fraction(-1, 1)})), "
215 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)}))),"
216 " rhs_eval_ops=("
217 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
218 "EvalOpAdd(lhs=EvalOpAdd(lhs="
219 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
220 "rhs="
221 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)})), "
222 "poly=EvalOpPoly({0: Fraction(1, 1), 2: Fraction(1, 1)})), rhs="
223 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
224 "poly=EvalOpPoly("
225 "{0: Fraction(1, 1), 1: Fraction(1, 1), 2: Fraction(1, 1)})), "
226 "EvalOpSub(lhs=EvalOpAdd(lhs="
227 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
228 "rhs="
229 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)})), "
230 "poly=EvalOpPoly({0: Fraction(1, 1), 2: Fraction(1, 1)})), rhs="
231 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
232 "poly=EvalOpPoly("
233 "{0: Fraction(1, 1), 1: Fraction(-1, 1), 2: Fraction(1, 1)})), "
234 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)}))),"
235 " prod_eval_ops=("
236 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
237 "EvalOpSub(lhs=EvalOpExactDiv(lhs=EvalOpSub(lhs="
238 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
239 "rhs="
240 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)})), "
241 "poly=EvalOpPoly({1: Fraction(1, 1), 2: Fraction(-1, 1)})), "
242 "rhs=2, "
243 "poly=EvalOpPoly({1: Fraction(1, 2), 2: Fraction(-1, 2)})), rhs="
244 "EvalOpInput(lhs=3, rhs=0, poly=EvalOpPoly({3: Fraction(1, 1)})), "
245 "poly=EvalOpPoly("
246 "{1: Fraction(1, 2), 2: Fraction(-1, 2), 3: Fraction(-1, 1)})), "
247 "EvalOpSub(lhs=EvalOpExactDiv(lhs=EvalOpAdd(lhs="
248 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
249 "rhs="
250 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)})), "
251 "poly=EvalOpPoly({1: Fraction(1, 1), 2: Fraction(1, 1)})), rhs=2, "
252 "poly=EvalOpPoly({1: Fraction(1, 2), 2: Fraction(1, 2)})), rhs="
253 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
254 "poly=EvalOpPoly("
255 "{0: Fraction(-1, 1), 1: Fraction(1, 2), 2: Fraction(1, 2)})), "
256 "EvalOpInput(lhs=3, rhs=0, poly=EvalOpPoly({3: Fraction(1, 1)}))))"
257 )
258
259 def test_simple_mul_192x192_pre_ra_sim(self):
260 for lhs_signed in False, True:
261 for rhs_signed in False, True:
262 self.tst_simple_mul_192x192_sim(
263 lhs_signed=lhs_signed, rhs_signed=rhs_signed,
264 get_state_factory=get_pre_ra_state_factory)
265
266 def test_simple_mul_192x192_post_ra_sim(self):
267 for lhs_signed in False, True:
268 for rhs_signed in False, True:
269 self.tst_simple_mul_192x192_sim(
270 lhs_signed=lhs_signed, rhs_signed=rhs_signed,
271 get_state_factory=get_post_ra_state_factory)
272
273 def tst_simple_mul_192x192_sim(
274 self, lhs_signed, # type: bool
275 rhs_signed, # type: bool
276 get_state_factory, # type: Callable[[Mul], _StateFactory]
277 ):
278 # test multiplying:
279 # 0x000191acb262e15b_4c6b5f2b19e1a53e_821a2342132c5b57
280 # * 0x4a37c0567bcbab53_cf1f597598194ae6_208a49071aeec507
281 # ==
282 # int("0x00074736574206e_6f69746163696c70"
283 # "_69746c756d207469_622d3438333e2d32"
284 # "_3931783239312079_7261727469627261", base=0)
285 # == int.from_bytes(b"arbitrary 192x192->384-bit multiplication test",
286 # 'little')
287 lhs_value = 0x000191acb262e15b_4c6b5f2b19e1a53e_821a2342132c5b57
288 rhs_value = 0x4a37c0567bcbab53_cf1f597598194ae6_208a49071aeec507
289 prod_value = int.from_bytes(
290 b"arbitrary 192x192->384-bit multiplication test", 'little')
291 self.assertEqual(lhs_value * rhs_value, prod_value)
292 code = Mul(
293 mul=lambda fn, lhs, rhs: (simple_mul(
294 fn=fn, lhs=lhs, lhs_signed=lhs_signed,
295 rhs=rhs, rhs_signed=rhs_signed, name="mul"), None),
296 lhs_size_in_words=3, rhs_size_in_words=3)
297 state_factory = get_state_factory(code)
298 ptr_in = 0x100
299 dest_ptr = ptr_in + code.dest_offset
300 lhs_ptr = ptr_in + code.lhs_offset
301 rhs_ptr = ptr_in + code.rhs_offset
302 for lhs_neg in False, True:
303 for rhs_neg in False, True:
304 if lhs_neg and not lhs_signed:
305 continue
306 if rhs_neg and not rhs_signed:
307 continue
308 with self.subTest(lhs_signed=lhs_signed,
309 rhs_signed=rhs_signed,
310 lhs_neg=lhs_neg, rhs_neg=rhs_neg):
311 with state_factory() as state:
312 state[code.ptr_in] = ptr_in,
313 lhs = lhs_value
314 if lhs_neg:
315 lhs = 2 ** 192 - lhs
316 rhs = rhs_value
317 if rhs_neg:
318 rhs = 2 ** 192 - rhs
319 for i in range(3):
320 v = (lhs >> GPR_SIZE_IN_BITS * i) & GPR_VALUE_MASK
321 state.store(lhs_ptr + i * GPR_SIZE_IN_BYTES, v)
322 for i in range(3):
323 v = (rhs >> GPR_SIZE_IN_BITS * i) & GPR_VALUE_MASK
324 state.store(rhs_ptr + i * GPR_SIZE_IN_BYTES, v)
325 code.fn.sim(state)
326 expected = prod_value
327 if lhs_neg != rhs_neg:
328 expected = 2 ** 384 - expected
329 prod = 0
330 for i in range(6):
331 v = state.load(dest_ptr + GPR_SIZE_IN_BYTES * i)
332 prod += v << (GPR_SIZE_IN_BITS * i)
333 self.assertEqual(hex(prod), hex(expected))
334
335 def test_simple_mul_192x192_ops(self):
336 code = Mul(mul=simple_umul, lhs_size_in_words=3, rhs_size_in_words=3)
337 fn = code.fn
338 self.assertEqual(
339 fn.ops_to_str(),
340 "ptr_in:\n"
341 " (<...outputs[0]: <I64>>) <= FuncArgR3\n"
342 "lhs_setvl:\n"
343 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x3)\n"
344 "load_lhs:\n"
345 " (<...outputs[0]: <I64*3>>) <= SvLd(\n"
346 " <ptr_in.outputs[0]: <I64>>,\n"
347 " <lhs_setvl.outputs[0]: <VL_MAXVL>>, 0x30)\n"
348 "rhs_setvl:\n"
349 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x3)\n"
350 "load_rhs:\n"
351 " (<...outputs[0]: <I64*3>>) <= SvLd(\n"
352 " <ptr_in.outputs[0]: <I64>>,\n"
353 " <rhs_setvl.outputs[0]: <VL_MAXVL>>, 0x48)\n"
354 "mul_rhs_setvl:\n"
355 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x3)\n"
356 "mul_rhs_spread:\n"
357 " (<...outputs[0]: <I64>>, <...outputs[1]: <I64>>,\n"
358 " <...outputs[2]: <I64>>) <= Spread(\n"
359 " <load_rhs.outputs[0]: <I64*3>>,\n"
360 " <mul_rhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
361 "mul_zero:\n"
362 " (<...outputs[0]: <I64>>) <= LI(0x0)\n"
363 "mul_lhs_setvl:\n"
364 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x3)\n"
365 "mul_zero2:\n"
366 " (<...outputs[0]: <I64>>) <= LI(0x0)\n"
367 "mul_0_mul:\n"
368 " (<...outputs[0]: <I64*3>>, <...outputs[1]: <I64>>\n"
369 " ) <= SvMAddEDU(<load_lhs.outputs[0]: <I64*3>>,\n"
370 " <mul_rhs_spread.outputs[0]: <I64>>,\n"
371 " <mul_zero.outputs[0]: <I64>>,\n"
372 " <mul_lhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
373 "mul_0_mul_rt_spread:\n"
374 " (<...outputs[0]: <I64>>, <...outputs[1]: <I64>>,\n"
375 " <...outputs[2]: <I64>>) <= Spread(\n"
376 " <mul_0_mul.outputs[0]: <I64*3>>,\n"
377 " <mul_lhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
378 "mul_1_mul:\n"
379 " (<...outputs[0]: <I64*3>>, <...outputs[1]: <I64>>\n"
380 " ) <= SvMAddEDU(<load_lhs.outputs[0]: <I64*3>>,\n"
381 " <mul_rhs_spread.outputs[1]: <I64>>,\n"
382 " <mul_zero.outputs[0]: <I64>>,\n"
383 " <mul_lhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
384 "mul_1_mul_rt_spread:\n"
385 " (<...outputs[0]: <I64>>, <...outputs[1]: <I64>>,\n"
386 " <...outputs[2]: <I64>>) <= Spread(\n"
387 " <mul_1_mul.outputs[0]: <I64*3>>,\n"
388 " <mul_lhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
389 "mul_1_cast_retval_zero:\n"
390 " (<...outputs[0]: <I64>>) <= LI(0x0)\n"
391 "mul_1_cast_pp_zero:\n"
392 " (<...outputs[0]: <I64>>) <= LI(0x0)\n"
393 "mul_1_setvl:\n"
394 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x5)\n"
395 "mul_1_retval_concat:\n"
396 " (<...outputs[0]: <I64*5>>) <= Concat(\n"
397 " <mul_0_mul_rt_spread.outputs[1]: <I64>>,\n"
398 " <mul_0_mul_rt_spread.outputs[2]: <I64>>,\n"
399 " <mul_0_mul.outputs[1]: <I64>>,\n"
400 " <mul_1_cast_retval_zero.outputs[0]: <I64>>,\n"
401 " <mul_1_cast_retval_zero.outputs[0]: <I64>>,\n"
402 " <mul_1_setvl.outputs[0]: <VL_MAXVL>>)\n"
403 "mul_1_pp_concat:\n"
404 " (<...outputs[0]: <I64*5>>) <= Concat(\n"
405 " <mul_1_mul_rt_spread.outputs[0]: <I64>>,\n"
406 " <mul_1_mul_rt_spread.outputs[1]: <I64>>,\n"
407 " <mul_1_mul_rt_spread.outputs[2]: <I64>>,\n"
408 " <mul_1_mul.outputs[1]: <I64>>,\n"
409 " <mul_1_cast_pp_zero.outputs[0]: <I64>>,\n"
410 " <mul_1_setvl.outputs[0]: <VL_MAXVL>>)\n"
411 "mul_1_clear_ca:\n"
412 " (<...outputs[0]: <CA>>) <= ClearCA\n"
413 "mul_1_add:\n"
414 " (<...outputs[0]: <I64*5>>, <...outputs[1]: <CA>>\n"
415 " ) <= SvAddE(<mul_1_retval_concat.outputs[0]: <I64*5>>,\n"
416 " <mul_1_pp_concat.outputs[0]: <I64*5>>,\n"
417 " <mul_1_clear_ca.outputs[0]: <CA>>,\n"
418 " <mul_1_setvl.outputs[0]: <VL_MAXVL>>)\n"
419 "mul_1_sum_spread:\n"
420 " (<...outputs[0]: <I64>>, <...outputs[1]: <I64>>,\n"
421 " <...outputs[2]: <I64>>, <...outputs[3]: <I64>>,\n"
422 " <...outputs[4]: <I64>>) <= Spread(\n"
423 " <mul_1_add.outputs[0]: <I64*5>>,\n"
424 " <mul_1_setvl.outputs[0]: <VL_MAXVL>>)\n"
425 "mul_2_mul:\n"
426 " (<...outputs[0]: <I64*3>>, <...outputs[1]: <I64>>\n"
427 " ) <= SvMAddEDU(<load_lhs.outputs[0]: <I64*3>>,\n"
428 " <mul_rhs_spread.outputs[2]: <I64>>,\n"
429 " <mul_zero.outputs[0]: <I64>>,\n"
430 " <mul_lhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
431 "mul_2_mul_rt_spread:\n"
432 " (<...outputs[0]: <I64>>, <...outputs[1]: <I64>>,\n"
433 " <...outputs[2]: <I64>>) <= Spread(\n"
434 " <mul_2_mul.outputs[0]: <I64*3>>,\n"
435 " <mul_lhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
436 "mul_2_setvl:\n"
437 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x4)\n"
438 "mul_2_retval_concat:\n"
439 " (<...outputs[0]: <I64*4>>) <= Concat(\n"
440 " <mul_1_sum_spread.outputs[1]: <I64>>,\n"
441 " <mul_1_sum_spread.outputs[2]: <I64>>,\n"
442 " <mul_1_sum_spread.outputs[3]: <I64>>,\n"
443 " <mul_1_sum_spread.outputs[4]: <I64>>,\n"
444 " <mul_2_setvl.outputs[0]: <VL_MAXVL>>)\n"
445 "mul_2_pp_concat:\n"
446 " (<...outputs[0]: <I64*4>>) <= Concat(\n"
447 " <mul_2_mul_rt_spread.outputs[0]: <I64>>,\n"
448 " <mul_2_mul_rt_spread.outputs[1]: <I64>>,\n"
449 " <mul_2_mul_rt_spread.outputs[2]: <I64>>,\n"
450 " <mul_2_mul.outputs[1]: <I64>>,\n"
451 " <mul_2_setvl.outputs[0]: <VL_MAXVL>>)\n"
452 "mul_2_clear_ca:\n"
453 " (<...outputs[0]: <CA>>) <= ClearCA\n"
454 "mul_2_add:\n"
455 " (<...outputs[0]: <I64*4>>, <...outputs[1]: <CA>>\n"
456 " ) <= SvAddE(<mul_2_retval_concat.outputs[0]: <I64*4>>,\n"
457 " <mul_2_pp_concat.outputs[0]: <I64*4>>,\n"
458 " <mul_2_clear_ca.outputs[0]: <CA>>,\n"
459 " <mul_2_setvl.outputs[0]: <VL_MAXVL>>)\n"
460 "mul_2_sum_spread:\n"
461 " (<...outputs[0]: <I64>>, <...outputs[1]: <I64>>,\n"
462 " <...outputs[2]: <I64>>, <...outputs[3]: <I64>>) <= Spread(\n"
463 " <mul_2_add.outputs[0]: <I64*4>>,\n"
464 " <mul_2_setvl.outputs[0]: <VL_MAXVL>>)\n"
465 "mul_setvl:\n"
466 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x6)\n"
467 "mul_concat:\n"
468 " (<...outputs[0]: <I64*6>>) <= Concat(\n"
469 " <mul_0_mul_rt_spread.outputs[0]: <I64>>,\n"
470 " <mul_1_sum_spread.outputs[0]: <I64>>,\n"
471 " <mul_2_sum_spread.outputs[0]: <I64>>,\n"
472 " <mul_2_sum_spread.outputs[1]: <I64>>,\n"
473 " <mul_2_sum_spread.outputs[2]: <I64>>,\n"
474 " <mul_2_sum_spread.outputs[3]: <I64>>,\n"
475 " <mul_setvl.outputs[0]: <VL_MAXVL>>)\n"
476 "dest_setvl:\n"
477 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x6)\n"
478 "store_dest:\n"
479 " SvStd(<mul_concat.outputs[0]: <I64*6>>,\n"
480 " <ptr_in.outputs[0]: <I64>>,\n"
481 " <dest_setvl.outputs[0]: <VL_MAXVL>>, 0x0)"
482 )
483
484 def test_simple_mul_192x192_reg_alloc(self):
485 code = Mul(mul=simple_umul, lhs_size_in_words=3, rhs_size_in_words=3)
486 fn = code.fn
487 assigned_registers = allocate_registers(fn)
488 self.assertEqual(
489 repr(assigned_registers), "{"
490 "<store_dest.inp2.setvl.outputs[0]: <VL_MAXVL>>: "
491 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
492 "<store_dest.inp1.copy.outputs[0]: <I64>>: "
493 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
494 "<store_dest.inp0.copy.outputs[0]: <I64*6>>: "
495 "Loc(kind=LocKind.GPR, start=4, reg_len=6), "
496 "<store_dest.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
497 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
498 "<dest_setvl.outputs[0]: <VL_MAXVL>>: "
499 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
500 "<mul_concat.out0.copy.outputs[0]: <I64*6>>: "
501 "Loc(kind=LocKind.GPR, start=3, reg_len=6), "
502 "<mul_concat.out0.setvl.outputs[0]: <VL_MAXVL>>: "
503 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
504 "<mul_concat.outputs[0]: <I64*6>>: "
505 "Loc(kind=LocKind.GPR, start=3, reg_len=6), "
506 "<mul_concat.inp0.copy.outputs[0]: <I64>>: "
507 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
508 "<mul_concat.inp1.copy.outputs[0]: <I64>>: "
509 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
510 "<mul_concat.inp2.copy.outputs[0]: <I64>>: "
511 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
512 "<mul_concat.inp3.copy.outputs[0]: <I64>>: "
513 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
514 "<mul_concat.inp4.copy.outputs[0]: <I64>>: "
515 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
516 "<mul_concat.inp5.copy.outputs[0]: <I64>>: "
517 "Loc(kind=LocKind.GPR, start=8, reg_len=1), "
518 "<mul_concat.inp6.setvl.outputs[0]: <VL_MAXVL>>: "
519 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
520 "<mul_setvl.outputs[0]: <VL_MAXVL>>: "
521 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
522 "<mul_2_sum_spread.out3.copy.outputs[0]: <I64>>: "
523 "Loc(kind=LocKind.GPR, start=9, reg_len=1), "
524 "<mul_2_sum_spread.out2.copy.outputs[0]: <I64>>: "
525 "Loc(kind=LocKind.GPR, start=10, reg_len=1), "
526 "<mul_2_sum_spread.out1.copy.outputs[0]: <I64>>: "
527 "Loc(kind=LocKind.GPR, start=11, reg_len=1), "
528 "<mul_2_sum_spread.out0.copy.outputs[0]: <I64>>: "
529 "Loc(kind=LocKind.GPR, start=12, reg_len=1), "
530 "<mul_2_sum_spread.outputs[0]: <I64>>: "
531 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
532 "<mul_2_sum_spread.outputs[1]: <I64>>: "
533 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
534 "<mul_2_sum_spread.outputs[2]: <I64>>: "
535 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
536 "<mul_2_sum_spread.outputs[3]: <I64>>: "
537 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
538 "<mul_2_sum_spread.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
539 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
540 "<mul_2_sum_spread.inp0.copy.outputs[0]: <I64*4>>: "
541 "Loc(kind=LocKind.GPR, start=3, reg_len=4), "
542 "<mul_2_sum_spread.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
543 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
544 "<mul_2_add.out0.copy.outputs[0]: <I64*4>>: "
545 "Loc(kind=LocKind.GPR, start=3, reg_len=4), "
546 "<mul_2_add.out0.setvl.outputs[0]: <VL_MAXVL>>: "
547 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
548 "<mul_2_clear_ca.outputs[0]: <CA>>: "
549 "Loc(kind=LocKind.CA, start=0, reg_len=1), "
550 "<mul_2_add.outputs[1]: <CA>>: "
551 "Loc(kind=LocKind.CA, start=0, reg_len=1), "
552 "<mul_2_add.outputs[0]: <I64*4>>: "
553 "Loc(kind=LocKind.GPR, start=3, reg_len=4), "
554 "<mul_2_add.inp3.setvl.outputs[0]: <VL_MAXVL>>: "
555 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
556 "<mul_2_add.inp1.copy.outputs[0]: <I64*4>>: "
557 "Loc(kind=LocKind.GPR, start=7, reg_len=4), "
558 "<mul_2_add.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
559 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
560 "<mul_2_add.inp0.copy.outputs[0]: <I64*4>>: "
561 "Loc(kind=LocKind.GPR, start=14, reg_len=4), "
562 "<mul_2_add.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
563 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
564 "<mul_2_pp_concat.out0.copy.outputs[0]: <I64*4>>: "
565 "Loc(kind=LocKind.GPR, start=3, reg_len=4), "
566 "<mul_2_pp_concat.out0.setvl.outputs[0]: <VL_MAXVL>>: "
567 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
568 "<mul_2_pp_concat.outputs[0]: <I64*4>>: "
569 "Loc(kind=LocKind.GPR, start=3, reg_len=4), "
570 "<mul_2_pp_concat.inp0.copy.outputs[0]: <I64>>: "
571 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
572 "<mul_2_pp_concat.inp1.copy.outputs[0]: <I64>>: "
573 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
574 "<mul_2_pp_concat.inp2.copy.outputs[0]: <I64>>: "
575 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
576 "<mul_2_pp_concat.inp3.copy.outputs[0]: <I64>>: "
577 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
578 "<mul_2_pp_concat.inp4.setvl.outputs[0]: <VL_MAXVL>>: "
579 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
580 "<mul_2_retval_concat.out0.copy.outputs[0]: <I64*4>>: "
581 "Loc(kind=LocKind.GPR, start=7, reg_len=4), "
582 "<mul_2_retval_concat.out0.setvl.outputs[0]: <VL_MAXVL>>: "
583 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
584 "<mul_2_retval_concat.outputs[0]: <I64*4>>: "
585 "Loc(kind=LocKind.GPR, start=3, reg_len=4), "
586 "<mul_2_retval_concat.inp0.copy.outputs[0]: <I64>>: "
587 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
588 "<mul_2_retval_concat.inp1.copy.outputs[0]: <I64>>: "
589 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
590 "<mul_2_retval_concat.inp2.copy.outputs[0]: <I64>>: "
591 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
592 "<mul_2_retval_concat.inp3.copy.outputs[0]: <I64>>: "
593 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
594 "<mul_2_retval_concat.inp4.setvl.outputs[0]: <VL_MAXVL>>: "
595 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
596 "<mul_2_setvl.outputs[0]: <VL_MAXVL>>: "
597 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
598 "<mul_2_mul_rt_spread.out2.copy.outputs[0]: <I64>>: "
599 "Loc(kind=LocKind.GPR, start=11, reg_len=1), "
600 "<mul_2_mul_rt_spread.out1.copy.outputs[0]: <I64>>: "
601 "Loc(kind=LocKind.GPR, start=12, reg_len=1), "
602 "<mul_2_mul_rt_spread.out0.copy.outputs[0]: <I64>>: "
603 "Loc(kind=LocKind.GPR, start=14, reg_len=1), "
604 "<mul_2_mul_rt_spread.outputs[0]: <I64>>: "
605 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
606 "<mul_2_mul_rt_spread.outputs[1]: <I64>>: "
607 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
608 "<mul_2_mul_rt_spread.outputs[2]: <I64>>: "
609 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
610 "<mul_2_mul_rt_spread.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
611 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
612 "<mul_2_mul_rt_spread.inp0.copy.outputs[0]: <I64*3>>: "
613 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
614 "<mul_2_mul_rt_spread.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
615 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
616 "<mul_2_mul.out1.copy.outputs[0]: <I64>>: "
617 "Loc(kind=LocKind.GPR, start=15, reg_len=1), "
618 "<mul_2_mul.out0.copy.outputs[0]: <I64*3>>: "
619 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
620 "<mul_2_mul.out0.setvl.outputs[0]: <VL_MAXVL>>: "
621 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
622 "<mul_2_mul.inp2.copy.outputs[0]: <I64>>: "
623 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
624 "<mul_2_mul.outputs[1]: <I64>>: "
625 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
626 "<mul_2_mul.outputs[0]: <I64*3>>: "
627 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
628 "<mul_2_mul.inp3.setvl.outputs[0]: <VL_MAXVL>>: "
629 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
630 "<mul_2_mul.inp1.copy.outputs[0]: <I64>>: "
631 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
632 "<mul_2_mul.inp0.copy.outputs[0]: <I64*3>>: "
633 "Loc(kind=LocKind.GPR, start=8, reg_len=3), "
634 "<mul_2_mul.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
635 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
636 "<mul_1_sum_spread.out4.copy.outputs[0]: <I64>>: "
637 "Loc(kind=LocKind.GPR, start=16, reg_len=1), "
638 "<mul_1_sum_spread.out3.copy.outputs[0]: <I64>>: "
639 "Loc(kind=LocKind.GPR, start=17, reg_len=1), "
640 "<mul_1_sum_spread.out2.copy.outputs[0]: <I64>>: "
641 "Loc(kind=LocKind.GPR, start=18, reg_len=1), "
642 "<mul_1_sum_spread.out1.copy.outputs[0]: <I64>>: "
643 "Loc(kind=LocKind.GPR, start=19, reg_len=1), "
644 "<mul_1_sum_spread.out0.copy.outputs[0]: <I64>>: "
645 "Loc(kind=LocKind.GPR, start=20, reg_len=1), "
646 "<mul_1_sum_spread.outputs[0]: <I64>>: "
647 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
648 "<mul_1_sum_spread.outputs[1]: <I64>>: "
649 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
650 "<mul_1_sum_spread.outputs[2]: <I64>>: "
651 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
652 "<mul_1_sum_spread.outputs[3]: <I64>>: "
653 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
654 "<mul_1_sum_spread.outputs[4]: <I64>>: "
655 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
656 "<mul_1_sum_spread.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
657 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
658 "<mul_1_sum_spread.inp0.copy.outputs[0]: <I64*5>>: "
659 "Loc(kind=LocKind.GPR, start=3, reg_len=5), "
660 "<mul_1_sum_spread.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
661 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
662 "<mul_1_add.out0.copy.outputs[0]: <I64*5>>: "
663 "Loc(kind=LocKind.GPR, start=3, reg_len=5), "
664 "<mul_1_add.out0.setvl.outputs[0]: <VL_MAXVL>>: "
665 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
666 "<mul_1_clear_ca.outputs[0]: <CA>>: "
667 "Loc(kind=LocKind.CA, start=0, reg_len=1), "
668 "<mul_1_add.outputs[1]: <CA>>: "
669 "Loc(kind=LocKind.CA, start=0, reg_len=1), "
670 "<mul_1_add.outputs[0]: <I64*5>>: "
671 "Loc(kind=LocKind.GPR, start=3, reg_len=5), "
672 "<mul_1_add.inp3.setvl.outputs[0]: <VL_MAXVL>>: "
673 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
674 "<mul_1_add.inp1.copy.outputs[0]: <I64*5>>: "
675 "Loc(kind=LocKind.GPR, start=8, reg_len=5), "
676 "<mul_1_add.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
677 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
678 "<mul_1_add.inp0.copy.outputs[0]: <I64*5>>: "
679 "Loc(kind=LocKind.GPR, start=14, reg_len=5), "
680 "<mul_1_add.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
681 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
682 "<mul_1_pp_concat.out0.copy.outputs[0]: <I64*5>>: "
683 "Loc(kind=LocKind.GPR, start=3, reg_len=5), "
684 "<mul_1_pp_concat.out0.setvl.outputs[0]: <VL_MAXVL>>: "
685 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
686 "<mul_1_pp_concat.outputs[0]: <I64*5>>: "
687 "Loc(kind=LocKind.GPR, start=3, reg_len=5), "
688 "<mul_1_pp_concat.inp0.copy.outputs[0]: <I64>>: "
689 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
690 "<mul_1_pp_concat.inp1.copy.outputs[0]: <I64>>: "
691 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
692 "<mul_1_pp_concat.inp2.copy.outputs[0]: <I64>>: "
693 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
694 "<mul_1_pp_concat.inp3.copy.outputs[0]: <I64>>: "
695 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
696 "<mul_1_pp_concat.inp4.copy.outputs[0]: <I64>>: "
697 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
698 "<mul_1_pp_concat.inp5.setvl.outputs[0]: <VL_MAXVL>>: "
699 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
700 "<mul_1_retval_concat.out0.copy.outputs[0]: <I64*5>>: "
701 "Loc(kind=LocKind.GPR, start=8, reg_len=5), "
702 "<mul_1_retval_concat.out0.setvl.outputs[0]: <VL_MAXVL>>: "
703 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
704 "<mul_1_retval_concat.outputs[0]: <I64*5>>: "
705 "Loc(kind=LocKind.GPR, start=3, reg_len=5), "
706 "<mul_1_retval_concat.inp0.copy.outputs[0]: <I64>>: "
707 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
708 "<mul_1_retval_concat.inp1.copy.outputs[0]: <I64>>: "
709 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
710 "<mul_1_retval_concat.inp2.copy.outputs[0]: <I64>>: "
711 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
712 "<mul_1_retval_concat.inp3.copy.outputs[0]: <I64>>: "
713 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
714 "<mul_1_retval_concat.inp4.copy.outputs[0]: <I64>>: "
715 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
716 "<mul_1_retval_concat.inp5.setvl.outputs[0]: <VL_MAXVL>>: "
717 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
718 "<mul_1_setvl.outputs[0]: <VL_MAXVL>>: "
719 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
720 "<mul_1_cast_pp_zero.out0.copy.outputs[0]: <I64>>: "
721 "Loc(kind=LocKind.GPR, start=14, reg_len=1), "
722 "<mul_1_cast_pp_zero.outputs[0]: <I64>>: "
723 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
724 "<mul_1_cast_retval_zero.out0.copy.outputs[0]: <I64>>: "
725 "Loc(kind=LocKind.GPR, start=8, reg_len=1), "
726 "<mul_1_cast_retval_zero.outputs[0]: <I64>>: "
727 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
728 "<mul_1_mul_rt_spread.out2.copy.outputs[0]: <I64>>: "
729 "Loc(kind=LocKind.GPR, start=15, reg_len=1), "
730 "<mul_1_mul_rt_spread.out1.copy.outputs[0]: <I64>>: "
731 "Loc(kind=LocKind.GPR, start=16, reg_len=1), "
732 "<mul_1_mul_rt_spread.out0.copy.outputs[0]: <I64>>: "
733 "Loc(kind=LocKind.GPR, start=17, reg_len=1), "
734 "<mul_1_mul_rt_spread.outputs[0]: <I64>>: "
735 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
736 "<mul_1_mul_rt_spread.outputs[1]: <I64>>: "
737 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
738 "<mul_1_mul_rt_spread.outputs[2]: <I64>>: "
739 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
740 "<mul_1_mul_rt_spread.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
741 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
742 "<mul_1_mul_rt_spread.inp0.copy.outputs[0]: <I64*3>>: "
743 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
744 "<mul_1_mul_rt_spread.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
745 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
746 "<mul_1_mul.out1.copy.outputs[0]: <I64>>: "
747 "Loc(kind=LocKind.GPR, start=18, reg_len=1), "
748 "<mul_1_mul.out0.copy.outputs[0]: <I64*3>>: "
749 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
750 "<mul_1_mul.out0.setvl.outputs[0]: <VL_MAXVL>>: "
751 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
752 "<mul_1_mul.inp2.copy.outputs[0]: <I64>>: "
753 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
754 "<mul_1_mul.outputs[1]: <I64>>: "
755 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
756 "<mul_1_mul.outputs[0]: <I64*3>>: "
757 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
758 "<mul_1_mul.inp3.setvl.outputs[0]: <VL_MAXVL>>: "
759 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
760 "<mul_1_mul.inp1.copy.outputs[0]: <I64>>: "
761 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
762 "<mul_1_mul.inp0.copy.outputs[0]: <I64*3>>: "
763 "Loc(kind=LocKind.GPR, start=8, reg_len=3), "
764 "<mul_1_mul.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
765 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
766 "<mul_0_mul_rt_spread.out2.copy.outputs[0]: <I64>>: "
767 "Loc(kind=LocKind.GPR, start=11, reg_len=1), "
768 "<mul_0_mul_rt_spread.out1.copy.outputs[0]: <I64>>: "
769 "Loc(kind=LocKind.GPR, start=12, reg_len=1), "
770 "<mul_0_mul_rt_spread.out0.copy.outputs[0]: <I64>>: "
771 "Loc(kind=LocKind.GPR, start=21, reg_len=1), "
772 "<mul_0_mul_rt_spread.outputs[0]: <I64>>: "
773 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
774 "<mul_0_mul_rt_spread.outputs[1]: <I64>>: "
775 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
776 "<mul_0_mul_rt_spread.outputs[2]: <I64>>: "
777 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
778 "<mul_0_mul_rt_spread.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
779 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
780 "<mul_0_mul_rt_spread.inp0.copy.outputs[0]: <I64*3>>: "
781 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
782 "<mul_0_mul_rt_spread.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
783 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
784 "<mul_0_mul.out1.copy.outputs[0]: <I64>>: "
785 "Loc(kind=LocKind.GPR, start=19, reg_len=1), "
786 "<mul_0_mul.out0.copy.outputs[0]: <I64*3>>: "
787 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
788 "<mul_0_mul.out0.setvl.outputs[0]: <VL_MAXVL>>: "
789 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
790 "<mul_0_mul.inp2.copy.outputs[0]: <I64>>: "
791 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
792 "<mul_0_mul.outputs[1]: <I64>>: "
793 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
794 "<mul_0_mul.outputs[0]: <I64*3>>: "
795 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
796 "<mul_0_mul.inp3.setvl.outputs[0]: <VL_MAXVL>>: "
797 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
798 "<mul_0_mul.inp1.copy.outputs[0]: <I64>>: "
799 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
800 "<mul_0_mul.inp0.copy.outputs[0]: <I64*3>>: "
801 "Loc(kind=LocKind.GPR, start=8, reg_len=3), "
802 "<mul_0_mul.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
803 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
804 "<mul_zero2.out0.copy.outputs[0]: <I64>>: "
805 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
806 "<mul_zero2.outputs[0]: <I64>>: "
807 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
808 "<mul_lhs_setvl.outputs[0]: <VL_MAXVL>>: "
809 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
810 "<mul_zero.out0.copy.outputs[0]: <I64>>: "
811 "Loc(kind=LocKind.GPR, start=22, reg_len=1), "
812 "<mul_zero.outputs[0]: <I64>>: "
813 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
814 "<mul_rhs_spread.out2.copy.outputs[0]: <I64>>: "
815 "Loc(kind=LocKind.GPR, start=23, reg_len=1), "
816 "<mul_rhs_spread.out1.copy.outputs[0]: <I64>>: "
817 "Loc(kind=LocKind.GPR, start=14, reg_len=1), "
818 "<mul_rhs_spread.out0.copy.outputs[0]: <I64>>: "
819 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
820 "<mul_rhs_spread.outputs[0]: <I64>>: "
821 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
822 "<mul_rhs_spread.outputs[1]: <I64>>: "
823 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
824 "<mul_rhs_spread.outputs[2]: <I64>>: "
825 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
826 "<mul_rhs_spread.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
827 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
828 "<mul_rhs_spread.inp0.copy.outputs[0]: <I64*3>>: "
829 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
830 "<mul_rhs_spread.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
831 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
832 "<mul_rhs_setvl.outputs[0]: <VL_MAXVL>>: "
833 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
834 "<load_rhs.out0.copy.outputs[0]: <I64*3>>: "
835 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
836 "<load_rhs.out0.setvl.outputs[0]: <VL_MAXVL>>: "
837 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
838 "<load_rhs.outputs[0]: <I64*3>>: "
839 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
840 "<load_rhs.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
841 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
842 "<load_rhs.inp0.copy.outputs[0]: <I64>>: "
843 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
844 "<rhs_setvl.outputs[0]: <VL_MAXVL>>: "
845 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
846 "<load_lhs.out0.copy.outputs[0]: <I64*3>>: "
847 "Loc(kind=LocKind.GPR, start=24, reg_len=3), "
848 "<load_lhs.out0.setvl.outputs[0]: <VL_MAXVL>>: "
849 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
850 "<load_lhs.outputs[0]: <I64*3>>: "
851 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
852 "<load_lhs.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
853 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
854 "<load_lhs.inp0.copy.outputs[0]: <I64>>: "
855 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
856 "<lhs_setvl.outputs[0]: <VL_MAXVL>>: "
857 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
858 "<ptr_in.out0.copy.outputs[0]: <I64>>: "
859 "Loc(kind=LocKind.GPR, start=27, reg_len=1), "
860 "<ptr_in.outputs[0]: <I64>>: "
861 "Loc(kind=LocKind.GPR, start=3, reg_len=1)"
862 "}")
863
864 def test_simple_mul_192x192_asm(self):
865 code = Mul(mul=simple_umul, lhs_size_in_words=3, rhs_size_in_words=3)
866 fn = code.fn
867 assigned_registers = allocate_registers(fn)
868 gen_asm_state = GenAsmState(assigned_registers)
869 fn.gen_asm(gen_asm_state)
870 self.assertEqual(gen_asm_state.output, [
871 'or 27, 3, 3',
872 'setvl 0, 0, 3, 0, 1, 1',
873 'or 6, 27, 27',
874 'setvl 0, 0, 3, 0, 1, 1',
875 'sv.ld *3, 48(6)',
876 'setvl 0, 0, 3, 0, 1, 1',
877 'sv.or *24, *3, *3',
878 'setvl 0, 0, 3, 0, 1, 1',
879 'or 6, 27, 27',
880 'setvl 0, 0, 3, 0, 1, 1',
881 'sv.ld *3, 72(6)',
882 'setvl 0, 0, 3, 0, 1, 1',
883 'setvl 0, 0, 3, 0, 1, 1',
884 'setvl 0, 0, 3, 0, 1, 1',
885 'setvl 0, 0, 3, 0, 1, 1',
886 'sv.or/mrr *5, *3, *3',
887 'or 4, 5, 5',
888 'or 14, 6, 6',
889 'or 23, 7, 7',
890 'addi 3, 0, 0',
891 'or 22, 3, 3',
892 'setvl 0, 0, 3, 0, 1, 1',
893 'addi 3, 0, 0',
894 'setvl 0, 0, 3, 0, 1, 1',
895 'sv.or *8, *24, *24',
896 'or 7, 4, 4',
897 'or 6, 22, 22',
898 'setvl 0, 0, 3, 0, 1, 1',
899 'sv.maddedu *3, *8, 7, 6',
900 'setvl 0, 0, 3, 0, 1, 1',
901 'or 19, 6, 6',
902 'setvl 0, 0, 3, 0, 1, 1',
903 'setvl 0, 0, 3, 0, 1, 1',
904 'or 21, 3, 3',
905 'or 12, 4, 4',
906 'or 11, 5, 5',
907 'setvl 0, 0, 3, 0, 1, 1',
908 'sv.or *8, *24, *24',
909 'or 7, 14, 14',
910 'or 6, 22, 22',
911 'setvl 0, 0, 3, 0, 1, 1',
912 'sv.maddedu *3, *8, 7, 6',
913 'setvl 0, 0, 3, 0, 1, 1',
914 'or 18, 6, 6',
915 'setvl 0, 0, 3, 0, 1, 1',
916 'setvl 0, 0, 3, 0, 1, 1',
917 'or 17, 3, 3',
918 'or 16, 4, 4',
919 'or 15, 5, 5',
920 'addi 3, 0, 0',
921 'or 8, 3, 3',
922 'addi 3, 0, 0',
923 'or 14, 3, 3',
924 'setvl 0, 0, 5, 0, 1, 1',
925 'or 3, 12, 12',
926 'or 4, 11, 11',
927 'or 5, 19, 19',
928 'or 6, 8, 8',
929 'or 7, 8, 8',
930 'setvl 0, 0, 5, 0, 1, 1',
931 'setvl 0, 0, 5, 0, 1, 1',
932 'sv.or *8, *3, *3',
933 'or 3, 17, 17',
934 'or 4, 16, 16',
935 'or 5, 15, 15',
936 'or 6, 18, 18',
937 'or 7, 14, 14',
938 'setvl 0, 0, 5, 0, 1, 1',
939 'setvl 0, 0, 5, 0, 1, 1',
940 'addic 0, 0, 0',
941 'setvl 0, 0, 5, 0, 1, 1',
942 'sv.or *14, *8, *8',
943 'setvl 0, 0, 5, 0, 1, 1',
944 'sv.or *8, *3, *3',
945 'setvl 0, 0, 5, 0, 1, 1',
946 'sv.adde *3, *14, *8',
947 'setvl 0, 0, 5, 0, 1, 1',
948 'setvl 0, 0, 5, 0, 1, 1',
949 'setvl 0, 0, 5, 0, 1, 1',
950 'or 20, 3, 3',
951 'or 19, 4, 4',
952 'or 18, 5, 5',
953 'or 17, 6, 6',
954 'or 16, 7, 7',
955 'setvl 0, 0, 3, 0, 1, 1',
956 'sv.or *8, *24, *24',
957 'or 7, 23, 23',
958 'or 6, 22, 22',
959 'setvl 0, 0, 3, 0, 1, 1',
960 'sv.maddedu *3, *8, 7, 6',
961 'setvl 0, 0, 3, 0, 1, 1',
962 'or 15, 6, 6',
963 'setvl 0, 0, 3, 0, 1, 1',
964 'setvl 0, 0, 3, 0, 1, 1',
965 'or 14, 3, 3',
966 'or 12, 4, 4',
967 'or 11, 5, 5',
968 'setvl 0, 0, 4, 0, 1, 1',
969 'or 3, 19, 19',
970 'or 4, 18, 18',
971 'or 5, 17, 17',
972 'or 6, 16, 16',
973 'setvl 0, 0, 4, 0, 1, 1',
974 'setvl 0, 0, 4, 0, 1, 1',
975 'sv.or *7, *3, *3',
976 'or 3, 14, 14',
977 'or 4, 12, 12',
978 'or 5, 11, 11',
979 'or 6, 15, 15',
980 'setvl 0, 0, 4, 0, 1, 1',
981 'setvl 0, 0, 4, 0, 1, 1',
982 'addic 0, 0, 0',
983 'setvl 0, 0, 4, 0, 1, 1',
984 'sv.or *14, *7, *7',
985 'setvl 0, 0, 4, 0, 1, 1',
986 'sv.or *7, *3, *3',
987 'setvl 0, 0, 4, 0, 1, 1',
988 'sv.adde *3, *14, *7',
989 'setvl 0, 0, 4, 0, 1, 1',
990 'setvl 0, 0, 4, 0, 1, 1',
991 'setvl 0, 0, 4, 0, 1, 1',
992 'or 12, 3, 3',
993 'or 11, 4, 4',
994 'or 10, 5, 5',
995 'or 9, 6, 6',
996 'setvl 0, 0, 6, 0, 1, 1',
997 'or 3, 21, 21',
998 'or 4, 20, 20',
999 'or 5, 12, 12',
1000 'or 6, 11, 11',
1001 'or 7, 10, 10',
1002 'or 8, 9, 9',
1003 'setvl 0, 0, 6, 0, 1, 1',
1004 'setvl 0, 0, 6, 0, 1, 1',
1005 'setvl 0, 0, 6, 0, 1, 1',
1006 'setvl 0, 0, 6, 0, 1, 1',
1007 'sv.or/mrr *4, *3, *3',
1008 'or 3, 27, 27',
1009 'setvl 0, 0, 6, 0, 1, 1',
1010 'sv.std *4, 0(3)'
1011 ])
1012
1013 def toom_2_mul_256x256(self, lhs_signed, rhs_signed):
1014 # type: (bool, bool) -> Mul
1015 TOOM_2 = ToomCookInstance.make_toom_2()
1016 instances = TOOM_2,
1017
1018 def mul(fn, lhs, rhs):
1019 # type: (Fn, SSAVal, SSAVal) -> tuple[SSAVal, ToomCookMul]
1020 v = ToomCookMul(fn=fn, lhs=lhs, lhs_signed=lhs_signed, rhs=rhs,
1021 rhs_signed=rhs_signed, instances=instances)
1022 return v.retval, v
1023 return Mul(mul=mul, lhs_size_in_words=4, rhs_size_in_words=4)
1024
1025 def make_256x256_mul_test_cases(self, lhs_signed, rhs_signed):
1026 # type: (bool, bool) -> Iterator[tuple[int, int, int]]
1027 # test multiplying `+-1 << n` and:
1028 # 0xc162321a5eaad80b_4b86bb0efdfb93c0_a789ff04cc11b157_eaa08e29fb197621
1029 # *
1030 # 0x3138710167583371_998af336a8fac64d_e6da3737090787fe_85ba09ea701f4af2
1031 # ==
1032 # int("0x"
1033 # "252e6e6f69746163_696c7069746c754d_"
1034 # "2061627573746172_614b202d20322d4d_"
1035 # "4f4f5420676e6973_75206c756d20746e_"
1036 # "6967696220746962_2d36353278363532", base=0)
1037 # == int.from_bytes(b'256x256-bit bigint mul using TOOM-2 '
1038 # b'- Karatsuba Multiplication.%', 'little')
1039 lhs_value_in = (0xc162321a5eaad80b_4b86bb0efdfb93c0 << 128) \
1040 | 0xa789ff04cc11b157_eaa08e29fb197621
1041 rhs_value_in = (0x3138710167583371_998af336a8fac64d << 128) \
1042 | 0xe6da3737090787fe_85ba09ea701f4af2
1043 prod_value_in = int.from_bytes(
1044 b'256x256-bit bigint mul using TOOM-2 '
1045 b'- Karatsuba Multiplication.%', 'little')
1046 self.assertEqual(lhs_value_in * rhs_value_in, prod_value_in)
1047 shifts = [*range(0, 256, 16), *range(15, 256, 16)]
1048 lhs_values = [1 << i for i in shifts] + [0, lhs_value_in]
1049 rhs_values = [1 << i for i in shifts] + [0, rhs_value_in]
1050 if lhs_signed:
1051 lhs_values.extend([-i for i in lhs_values])
1052 if rhs_signed:
1053 rhs_values.extend([-i for i in rhs_values])
1054
1055 def key(v):
1056 # type: (int) -> tuple[bool, int]
1057 return abs(v) in (lhs_value_in, rhs_value_in), v % (1 << 256)
1058
1059 lhs_values.sort(key=key)
1060 rhs_values.sort(key=key)
1061 for lhs_value in lhs_values:
1062 for rhs_value in rhs_values:
1063 lhs_value %= 1 << 256
1064 rhs_value %= 1 << 256
1065 if lhs_value >> 255 != 0 and lhs_signed:
1066 lhs_value -= 1 << 256
1067 if rhs_value >> 255 != 0 and rhs_signed:
1068 rhs_value -= 1 << 256
1069 prod_value = lhs_value * rhs_value
1070 lhs_value %= 1 << 256
1071 rhs_value %= 1 << 256
1072 prod_value %= 1 << 512
1073 yield lhs_value, rhs_value, prod_value
1074
1075 def tst_toom_2_mul_256x256_sim(
1076 self, lhs_signed, # type: bool
1077 rhs_signed, # type: bool
1078 get_state_factory, # type: Callable[[Mul], _StateFactory]
1079 ):
1080 code = self.toom_2_mul_256x256(
1081 lhs_signed=lhs_signed, rhs_signed=rhs_signed)
1082 print(code.retval[1])
1083 print(code.fn.ops_to_str())
1084 state_factory = get_state_factory(code)
1085 ptr_in = 0x100
1086 dest_ptr = ptr_in + code.dest_offset
1087 lhs_ptr = ptr_in + code.lhs_offset
1088 rhs_ptr = ptr_in + code.rhs_offset
1089 values = self.make_256x256_mul_test_cases(
1090 lhs_signed=lhs_signed, rhs_signed=rhs_signed)
1091 for lhs_value, rhs_value, prod_value in values:
1092 with self.subTest(lhs_signed=lhs_signed, rhs_signed=rhs_signed,
1093 lhs_value=hex(lhs_value),
1094 rhs_value=hex(rhs_value),
1095 prod_value=hex(prod_value)):
1096 with state_factory() as state:
1097 state[code.ptr_in] = ptr_in,
1098 for i in range(4):
1099 v = lhs_value >> GPR_SIZE_IN_BITS * i
1100 v &= GPR_VALUE_MASK
1101 state.store(lhs_ptr + i * GPR_SIZE_IN_BYTES, v)
1102 for i in range(4):
1103 v = rhs_value >> GPR_SIZE_IN_BITS * i
1104 v &= GPR_VALUE_MASK
1105 state.store(rhs_ptr + i * GPR_SIZE_IN_BYTES, v)
1106 code.fn.sim(state)
1107 prod = 0
1108 for i in range(8):
1109 v = state.load(dest_ptr + GPR_SIZE_IN_BYTES * i)
1110 prod += v << (GPR_SIZE_IN_BITS * i)
1111 self.assertEqual(hex(prod), hex(prod_value),
1112 f"failed: state={state}")
1113
1114 def test_toom_2_mul_256x256_pre_ra_sim(self):
1115 for lhs_signed in False, True:
1116 for rhs_signed in False, True:
1117 self.tst_toom_2_mul_256x256_sim(
1118 lhs_signed=lhs_signed, rhs_signed=rhs_signed,
1119 get_state_factory=get_pre_ra_state_factory)
1120
1121 def test_toom_2_mul_256x256_uu_post_ra_sim(self):
1122 self.tst_toom_2_mul_256x256_sim(
1123 lhs_signed=False, rhs_signed=False,
1124 get_state_factory=get_post_ra_state_factory)
1125
1126 def test_toom_2_mul_256x256_su_post_ra_sim(self):
1127 self.tst_toom_2_mul_256x256_sim(
1128 lhs_signed=True, rhs_signed=False,
1129 get_state_factory=get_post_ra_state_factory)
1130
1131 def test_toom_2_mul_256x256_us_post_ra_sim(self):
1132 self.tst_toom_2_mul_256x256_sim(
1133 lhs_signed=False, rhs_signed=True,
1134 get_state_factory=get_post_ra_state_factory)
1135
1136 def test_toom_2_mul_256x256_ss_post_ra_sim(self):
1137 self.tst_toom_2_mul_256x256_sim(
1138 lhs_signed=True, rhs_signed=True,
1139 get_state_factory=get_post_ra_state_factory)
1140
1141 def test_toom_2_mul_256x256_asm(self):
1142 code = self.toom_2_mul_256x256(lhs_signed=False, rhs_signed=False)
1143 fn = code.fn
1144 assigned_registers = allocate_registers(fn)
1145 gen_asm_state = GenAsmState(assigned_registers)
1146 fn.gen_asm(gen_asm_state)
1147 self.assertEqual(gen_asm_state.output, [
1148 'or 42, 3, 3',
1149 'setvl 0, 0, 4, 0, 1, 1',
1150 'or 7, 42, 42',
1151 'setvl 0, 0, 4, 0, 1, 1',
1152 'sv.ld *3, 64(7)',
1153 'setvl 0, 0, 4, 0, 1, 1',
1154 'sv.or *8, *3, *3',
1155 'setvl 0, 0, 4, 0, 1, 1',
1156 'or 7, 42, 42',
1157 'setvl 0, 0, 4, 0, 1, 1',
1158 'sv.ld *3, 96(7)',
1159 'setvl 0, 0, 4, 0, 1, 1',
1160 'sv.or *14, *3, *3',
1161 'setvl 0, 0, 4, 0, 1, 1',
1162 'setvl 0, 0, 4, 0, 1, 1',
1163 'sv.or *3, *8, *8',
1164 'setvl 0, 0, 4, 0, 1, 1',
1165 'sv.or *9, *3, *3',
1166 'or 3, 9, 9',
1167 'or 5, 10, 10',
1168 'or 8, 11, 11',
1169 'or 7, 12, 12',
1170 'setvl 0, 0, 2, 0, 1, 1',
1171 'or 4, 5, 5',
1172 'setvl 0, 0, 2, 0, 1, 1',
1173 'setvl 0, 0, 2, 0, 1, 1',
1174 'sv.or *5, *3, *3',
1175 'setvl 0, 0, 2, 0, 1, 1',
1176 'or 3, 8, 8',
1177 'or 4, 7, 7',
1178 'setvl 0, 0, 2, 0, 1, 1',
1179 'setvl 0, 0, 2, 0, 1, 1',
1180 'sv.or *8, *3, *3',
1181 'setvl 0, 0, 2, 0, 1, 1',
1182 'setvl 0, 0, 2, 0, 1, 1',
1183 'sv.or *3, *5, *5',
1184 'setvl 0, 0, 2, 0, 1, 1',
1185 'sv.or *5, *3, *3',
1186 'or 4, 5, 5',
1187 'or 7, 6, 6',
1188 'addi 3, 0, 0',
1189 'or 6, 3, 3',
1190 'setvl 0, 0, 3, 0, 1, 1',
1191 'or 3, 4, 4',
1192 'or 4, 7, 7',
1193 'or 5, 6, 6',
1194 'setvl 0, 0, 3, 0, 1, 1',
1195 'setvl 0, 0, 3, 0, 1, 1',
1196 'sv.or *24, *3, *3',
1197 'setvl 0, 0, 2, 0, 1, 1',
1198 'setvl 0, 0, 2, 0, 1, 1',
1199 'sv.or *3, *8, *8',
1200 'setvl 0, 0, 2, 0, 1, 1',
1201 'sv.or *5, *3, *3',
1202 'or 4, 5, 5',
1203 'or 7, 6, 6',
1204 'addi 3, 0, 0',
1205 'or 6, 3, 3',
1206 'setvl 0, 0, 3, 0, 1, 1',
1207 'or 3, 4, 4',
1208 'or 4, 7, 7',
1209 'or 5, 6, 6',
1210 'setvl 0, 0, 3, 0, 1, 1',
1211 'setvl 0, 0, 3, 0, 1, 1',
1212 'sv.or *30, *3, *3',
1213 'setvl 0, 0, 3, 0, 1, 1',
1214 'addic 0, 0, 0',
1215 'setvl 0, 0, 3, 0, 1, 1',
1216 'sv.or *9, *24, *24',
1217 'setvl 0, 0, 3, 0, 1, 1',
1218 'sv.or *6, *30, *30',
1219 'setvl 0, 0, 3, 0, 1, 1',
1220 'sv.adde *3, *9, *6',
1221 'setvl 0, 0, 3, 0, 1, 1',
1222 'sv.or *39, *3, *3',
1223 'setvl 0, 0, 4, 0, 1, 1',
1224 'setvl 0, 0, 4, 0, 1, 1',
1225 'sv.or *3, *14, *14',
1226 'setvl 0, 0, 4, 0, 1, 1',
1227 'sv.or *9, *3, *3',
1228 'or 3, 9, 9',
1229 'or 5, 10, 10',
1230 'or 8, 11, 11',
1231 'or 7, 12, 12',
1232 'setvl 0, 0, 2, 0, 1, 1',
1233 'or 4, 5, 5',
1234 'setvl 0, 0, 2, 0, 1, 1',
1235 'setvl 0, 0, 2, 0, 1, 1',
1236 'sv.or *5, *3, *3',
1237 'setvl 0, 0, 2, 0, 1, 1',
1238 'or 3, 8, 8',
1239 'or 4, 7, 7',
1240 'setvl 0, 0, 2, 0, 1, 1',
1241 'setvl 0, 0, 2, 0, 1, 1',
1242 'sv.or *8, *3, *3',
1243 'setvl 0, 0, 2, 0, 1, 1',
1244 'setvl 0, 0, 2, 0, 1, 1',
1245 'sv.or *3, *5, *5',
1246 'setvl 0, 0, 2, 0, 1, 1',
1247 'sv.or *5, *3, *3',
1248 'or 4, 5, 5',
1249 'or 7, 6, 6',
1250 'addi 3, 0, 0',
1251 'or 6, 3, 3',
1252 'setvl 0, 0, 3, 0, 1, 1',
1253 'or 3, 4, 4',
1254 'or 4, 7, 7',
1255 'or 5, 6, 6',
1256 'setvl 0, 0, 3, 0, 1, 1',
1257 'setvl 0, 0, 3, 0, 1, 1',
1258 'sv.or *14, *3, *3',
1259 'setvl 0, 0, 2, 0, 1, 1',
1260 'setvl 0, 0, 2, 0, 1, 1',
1261 'sv.or *3, *8, *8',
1262 'setvl 0, 0, 2, 0, 1, 1',
1263 'sv.or *5, *3, *3',
1264 'or 4, 5, 5',
1265 'or 7, 6, 6',
1266 'addi 3, 0, 0',
1267 'or 6, 3, 3',
1268 'setvl 0, 0, 3, 0, 1, 1',
1269 'or 3, 4, 4',
1270 'or 4, 7, 7',
1271 'or 5, 6, 6',
1272 'setvl 0, 0, 3, 0, 1, 1',
1273 'setvl 0, 0, 3, 0, 1, 1',
1274 'sv.or *33, *3, *3',
1275 'setvl 0, 0, 3, 0, 1, 1',
1276 'addic 0, 0, 0',
1277 'setvl 0, 0, 3, 0, 1, 1',
1278 'sv.or *9, *14, *14',
1279 'setvl 0, 0, 3, 0, 1, 1',
1280 'sv.or *6, *33, *33',
1281 'setvl 0, 0, 3, 0, 1, 1',
1282 'sv.adde *3, *9, *6',
1283 'setvl 0, 0, 3, 0, 1, 1',
1284 'sv.or *36, *3, *3',
1285 'setvl 0, 0, 3, 0, 1, 1',
1286 'setvl 0, 0, 3, 0, 1, 1',
1287 'sv.or *3, *14, *14',
1288 'setvl 0, 0, 3, 0, 1, 1',
1289 'sv.or/mrr *5, *3, *3',
1290 'or 4, 5, 5',
1291 'or 14, 6, 6',
1292 'or 23, 7, 7',
1293 'addi 3, 0, 0',
1294 'or 22, 3, 3',
1295 'setvl 0, 0, 3, 0, 1, 1',
1296 'addi 3, 0, 0',
1297 'setvl 0, 0, 3, 0, 1, 1',
1298 'sv.or *8, *24, *24',
1299 'or 7, 4, 4',
1300 'or 6, 22, 22',
1301 'setvl 0, 0, 3, 0, 1, 1',
1302 'sv.maddedu *3, *8, 7, 6',
1303 'setvl 0, 0, 3, 0, 1, 1',
1304 'or 19, 6, 6',
1305 'setvl 0, 0, 3, 0, 1, 1',
1306 'setvl 0, 0, 3, 0, 1, 1',
1307 'or 21, 3, 3',
1308 'or 12, 4, 4',
1309 'or 11, 5, 5',
1310 'setvl 0, 0, 3, 0, 1, 1',
1311 'sv.or *8, *24, *24',
1312 'or 7, 14, 14',
1313 'or 6, 22, 22',
1314 'setvl 0, 0, 3, 0, 1, 1',
1315 'sv.maddedu *3, *8, 7, 6',
1316 'setvl 0, 0, 3, 0, 1, 1',
1317 'or 18, 6, 6',
1318 'setvl 0, 0, 3, 0, 1, 1',
1319 'setvl 0, 0, 3, 0, 1, 1',
1320 'or 17, 3, 3',
1321 'or 16, 4, 4',
1322 'or 15, 5, 5',
1323 'addi 3, 0, 0',
1324 'or 8, 3, 3',
1325 'addi 3, 0, 0',
1326 'or 14, 3, 3',
1327 'setvl 0, 0, 5, 0, 1, 1',
1328 'or 3, 12, 12',
1329 'or 4, 11, 11',
1330 'or 5, 19, 19',
1331 'or 6, 8, 8',
1332 'or 7, 8, 8',
1333 'setvl 0, 0, 5, 0, 1, 1',
1334 'setvl 0, 0, 5, 0, 1, 1',
1335 'sv.or *8, *3, *3',
1336 'or 3, 17, 17',
1337 'or 4, 16, 16',
1338 'or 5, 15, 15',
1339 'or 6, 18, 18',
1340 'or 7, 14, 14',
1341 'setvl 0, 0, 5, 0, 1, 1',
1342 'setvl 0, 0, 5, 0, 1, 1',
1343 'addic 0, 0, 0',
1344 'setvl 0, 0, 5, 0, 1, 1',
1345 'sv.or *14, *8, *8',
1346 'setvl 0, 0, 5, 0, 1, 1',
1347 'sv.or *8, *3, *3',
1348 'setvl 0, 0, 5, 0, 1, 1',
1349 'sv.adde *3, *14, *8',
1350 'setvl 0, 0, 5, 0, 1, 1',
1351 'setvl 0, 0, 5, 0, 1, 1',
1352 'setvl 0, 0, 5, 0, 1, 1',
1353 'or 20, 3, 3',
1354 'or 19, 4, 4',
1355 'or 18, 5, 5',
1356 'or 17, 6, 6',
1357 'or 16, 7, 7',
1358 'setvl 0, 0, 3, 0, 1, 1',
1359 'sv.or *8, *24, *24',
1360 'or 7, 23, 23',
1361 'or 6, 22, 22',
1362 'setvl 0, 0, 3, 0, 1, 1',
1363 'sv.maddedu *3, *8, 7, 6',
1364 'setvl 0, 0, 3, 0, 1, 1',
1365 'or 15, 6, 6',
1366 'setvl 0, 0, 3, 0, 1, 1',
1367 'setvl 0, 0, 3, 0, 1, 1',
1368 'or 14, 3, 3',
1369 'or 12, 4, 4',
1370 'or 11, 5, 5',
1371 'setvl 0, 0, 4, 0, 1, 1',
1372 'or 3, 19, 19',
1373 'or 4, 18, 18',
1374 'or 5, 17, 17',
1375 'or 6, 16, 16',
1376 'setvl 0, 0, 4, 0, 1, 1',
1377 'setvl 0, 0, 4, 0, 1, 1',
1378 'sv.or *7, *3, *3',
1379 'or 3, 14, 14',
1380 'or 4, 12, 12',
1381 'or 5, 11, 11',
1382 'or 6, 15, 15',
1383 'setvl 0, 0, 4, 0, 1, 1',
1384 'setvl 0, 0, 4, 0, 1, 1',
1385 'addic 0, 0, 0',
1386 'setvl 0, 0, 4, 0, 1, 1',
1387 'sv.or *14, *7, *7',
1388 'setvl 0, 0, 4, 0, 1, 1',
1389 'sv.or *7, *3, *3',
1390 'setvl 0, 0, 4, 0, 1, 1',
1391 'sv.adde *3, *14, *7',
1392 'setvl 0, 0, 4, 0, 1, 1',
1393 'setvl 0, 0, 4, 0, 1, 1',
1394 'setvl 0, 0, 4, 0, 1, 1',
1395 'or 12, 3, 3',
1396 'or 11, 4, 4',
1397 'or 10, 5, 5',
1398 'or 9, 6, 6',
1399 'setvl 0, 0, 6, 0, 1, 1',
1400 'or 3, 21, 21',
1401 'or 4, 20, 20',
1402 'or 5, 12, 12',
1403 'or 6, 11, 11',
1404 'or 7, 10, 10',
1405 'or 8, 9, 9',
1406 'setvl 0, 0, 6, 0, 1, 1',
1407 'setvl 0, 0, 6, 0, 1, 1',
1408 'sv.or *24, *3, *3',
1409 'setvl 0, 0, 3, 0, 1, 1',
1410 'setvl 0, 0, 3, 0, 1, 1',
1411 'sv.or *3, *36, *36',
1412 'setvl 0, 0, 3, 0, 1, 1',
1413 'sv.or/mrr *5, *3, *3',
1414 'or 4, 5, 5',
1415 'or 14, 6, 6',
1416 'or 23, 7, 7',
1417 'addi 3, 0, 0',
1418 'or 22, 3, 3',
1419 'setvl 0, 0, 3, 0, 1, 1',
1420 'addi 3, 0, 0',
1421 'setvl 0, 0, 3, 0, 1, 1',
1422 'sv.or *8, *39, *39',
1423 'or 7, 4, 4',
1424 'or 6, 22, 22',
1425 'setvl 0, 0, 3, 0, 1, 1',
1426 'sv.maddedu *3, *8, 7, 6',
1427 'setvl 0, 0, 3, 0, 1, 1',
1428 'or 19, 6, 6',
1429 'setvl 0, 0, 3, 0, 1, 1',
1430 'setvl 0, 0, 3, 0, 1, 1',
1431 'or 21, 3, 3',
1432 'or 12, 4, 4',
1433 'or 11, 5, 5',
1434 'setvl 0, 0, 3, 0, 1, 1',
1435 'sv.or *8, *39, *39',
1436 'or 7, 14, 14',
1437 'or 6, 22, 22',
1438 'setvl 0, 0, 3, 0, 1, 1',
1439 'sv.maddedu *3, *8, 7, 6',
1440 'setvl 0, 0, 3, 0, 1, 1',
1441 'or 18, 6, 6',
1442 'setvl 0, 0, 3, 0, 1, 1',
1443 'setvl 0, 0, 3, 0, 1, 1',
1444 'or 17, 3, 3',
1445 'or 16, 4, 4',
1446 'or 15, 5, 5',
1447 'addi 3, 0, 0',
1448 'or 8, 3, 3',
1449 'addi 3, 0, 0',
1450 'or 14, 3, 3',
1451 'setvl 0, 0, 5, 0, 1, 1',
1452 'or 3, 12, 12',
1453 'or 4, 11, 11',
1454 'or 5, 19, 19',
1455 'or 6, 8, 8',
1456 'or 7, 8, 8',
1457 'setvl 0, 0, 5, 0, 1, 1',
1458 'setvl 0, 0, 5, 0, 1, 1',
1459 'sv.or *8, *3, *3',
1460 'or 3, 17, 17',
1461 'or 4, 16, 16',
1462 'or 5, 15, 15',
1463 'or 6, 18, 18',
1464 'or 7, 14, 14',
1465 'setvl 0, 0, 5, 0, 1, 1',
1466 'setvl 0, 0, 5, 0, 1, 1',
1467 'addic 0, 0, 0',
1468 'setvl 0, 0, 5, 0, 1, 1',
1469 'sv.or *14, *8, *8',
1470 'setvl 0, 0, 5, 0, 1, 1',
1471 'sv.or *8, *3, *3',
1472 'setvl 0, 0, 5, 0, 1, 1',
1473 'sv.adde *3, *14, *8',
1474 'setvl 0, 0, 5, 0, 1, 1',
1475 'setvl 0, 0, 5, 0, 1, 1',
1476 'setvl 0, 0, 5, 0, 1, 1',
1477 'or 20, 3, 3',
1478 'or 19, 4, 4',
1479 'or 18, 5, 5',
1480 'or 17, 6, 6',
1481 'or 16, 7, 7',
1482 'setvl 0, 0, 3, 0, 1, 1',
1483 'sv.or *8, *39, *39',
1484 'or 7, 23, 23',
1485 'or 6, 22, 22',
1486 'setvl 0, 0, 3, 0, 1, 1',
1487 'sv.maddedu *3, *8, 7, 6',
1488 'setvl 0, 0, 3, 0, 1, 1',
1489 'or 15, 6, 6',
1490 'setvl 0, 0, 3, 0, 1, 1',
1491 'setvl 0, 0, 3, 0, 1, 1',
1492 'or 14, 3, 3',
1493 'or 12, 4, 4',
1494 'or 11, 5, 5',
1495 'setvl 0, 0, 4, 0, 1, 1',
1496 'or 3, 19, 19',
1497 'or 4, 18, 18',
1498 'or 5, 17, 17',
1499 'or 6, 16, 16',
1500 'setvl 0, 0, 4, 0, 1, 1',
1501 'setvl 0, 0, 4, 0, 1, 1',
1502 'sv.or *7, *3, *3',
1503 'or 3, 14, 14',
1504 'or 4, 12, 12',
1505 'or 5, 11, 11',
1506 'or 6, 15, 15',
1507 'setvl 0, 0, 4, 0, 1, 1',
1508 'setvl 0, 0, 4, 0, 1, 1',
1509 'addic 0, 0, 0',
1510 'setvl 0, 0, 4, 0, 1, 1',
1511 'sv.or *14, *7, *7',
1512 'setvl 0, 0, 4, 0, 1, 1',
1513 'sv.or *7, *3, *3',
1514 'setvl 0, 0, 4, 0, 1, 1',
1515 'sv.adde *3, *14, *7',
1516 'setvl 0, 0, 4, 0, 1, 1',
1517 'setvl 0, 0, 4, 0, 1, 1',
1518 'setvl 0, 0, 4, 0, 1, 1',
1519 'or 12, 3, 3',
1520 'or 11, 4, 4',
1521 'or 10, 5, 5',
1522 'or 9, 6, 6',
1523 'setvl 0, 0, 6, 0, 1, 1',
1524 'or 3, 21, 21',
1525 'or 4, 20, 20',
1526 'or 5, 12, 12',
1527 'or 6, 11, 11',
1528 'or 7, 10, 10',
1529 'or 8, 9, 9',
1530 'setvl 0, 0, 6, 0, 1, 1',
1531 'setvl 0, 0, 6, 0, 1, 1',
1532 'sv.or *36, *3, *3',
1533 'setvl 0, 0, 3, 0, 1, 1',
1534 'setvl 0, 0, 3, 0, 1, 1',
1535 'sv.or *3, *33, *33',
1536 'setvl 0, 0, 3, 0, 1, 1',
1537 'sv.or/mrr *5, *3, *3',
1538 'or 4, 5, 5',
1539 'or 14, 6, 6',
1540 'or 23, 7, 7',
1541 'addi 3, 0, 0',
1542 'or 22, 3, 3',
1543 'setvl 0, 0, 3, 0, 1, 1',
1544 'addi 3, 0, 0',
1545 'setvl 0, 0, 3, 0, 1, 1',
1546 'sv.or *8, *30, *30',
1547 'or 7, 4, 4',
1548 'or 6, 22, 22',
1549 'setvl 0, 0, 3, 0, 1, 1',
1550 'sv.maddedu *3, *8, 7, 6',
1551 'setvl 0, 0, 3, 0, 1, 1',
1552 'or 19, 6, 6',
1553 'setvl 0, 0, 3, 0, 1, 1',
1554 'setvl 0, 0, 3, 0, 1, 1',
1555 'or 21, 3, 3',
1556 'or 12, 4, 4',
1557 'or 11, 5, 5',
1558 'setvl 0, 0, 3, 0, 1, 1',
1559 'sv.or *8, *30, *30',
1560 'or 7, 14, 14',
1561 'or 6, 22, 22',
1562 'setvl 0, 0, 3, 0, 1, 1',
1563 'sv.maddedu *3, *8, 7, 6',
1564 'setvl 0, 0, 3, 0, 1, 1',
1565 'or 18, 6, 6',
1566 'setvl 0, 0, 3, 0, 1, 1',
1567 'setvl 0, 0, 3, 0, 1, 1',
1568 'or 17, 3, 3',
1569 'or 16, 4, 4',
1570 'or 15, 5, 5',
1571 'addi 3, 0, 0',
1572 'or 8, 3, 3',
1573 'addi 3, 0, 0',
1574 'or 14, 3, 3',
1575 'setvl 0, 0, 5, 0, 1, 1',
1576 'or 3, 12, 12',
1577 'or 4, 11, 11',
1578 'or 5, 19, 19',
1579 'or 6, 8, 8',
1580 'or 7, 8, 8',
1581 'setvl 0, 0, 5, 0, 1, 1',
1582 'setvl 0, 0, 5, 0, 1, 1',
1583 'sv.or *8, *3, *3',
1584 'or 3, 17, 17',
1585 'or 4, 16, 16',
1586 'or 5, 15, 15',
1587 'or 6, 18, 18',
1588 'or 7, 14, 14',
1589 'setvl 0, 0, 5, 0, 1, 1',
1590 'setvl 0, 0, 5, 0, 1, 1',
1591 'addic 0, 0, 0',
1592 'setvl 0, 0, 5, 0, 1, 1',
1593 'sv.or *14, *8, *8',
1594 'setvl 0, 0, 5, 0, 1, 1',
1595 'sv.or *8, *3, *3',
1596 'setvl 0, 0, 5, 0, 1, 1',
1597 'sv.adde *3, *14, *8',
1598 'setvl 0, 0, 5, 0, 1, 1',
1599 'setvl 0, 0, 5, 0, 1, 1',
1600 'setvl 0, 0, 5, 0, 1, 1',
1601 'or 20, 3, 3',
1602 'or 19, 4, 4',
1603 'or 18, 5, 5',
1604 'or 17, 6, 6',
1605 'or 16, 7, 7',
1606 'setvl 0, 0, 3, 0, 1, 1',
1607 'sv.or *8, *30, *30',
1608 'or 7, 23, 23',
1609 'or 6, 22, 22',
1610 'setvl 0, 0, 3, 0, 1, 1',
1611 'sv.maddedu *3, *8, 7, 6',
1612 'setvl 0, 0, 3, 0, 1, 1',
1613 'or 15, 6, 6',
1614 'setvl 0, 0, 3, 0, 1, 1',
1615 'setvl 0, 0, 3, 0, 1, 1',
1616 'or 14, 3, 3',
1617 'or 12, 4, 4',
1618 'or 11, 5, 5',
1619 'setvl 0, 0, 4, 0, 1, 1',
1620 'or 3, 19, 19',
1621 'or 4, 18, 18',
1622 'or 5, 17, 17',
1623 'or 6, 16, 16',
1624 'setvl 0, 0, 4, 0, 1, 1',
1625 'setvl 0, 0, 4, 0, 1, 1',
1626 'sv.or *7, *3, *3',
1627 'or 3, 14, 14',
1628 'or 4, 12, 12',
1629 'or 5, 11, 11',
1630 'or 6, 15, 15',
1631 'setvl 0, 0, 4, 0, 1, 1',
1632 'setvl 0, 0, 4, 0, 1, 1',
1633 'addic 0, 0, 0',
1634 'setvl 0, 0, 4, 0, 1, 1',
1635 'sv.or *14, *7, *7',
1636 'setvl 0, 0, 4, 0, 1, 1',
1637 'sv.or *7, *3, *3',
1638 'setvl 0, 0, 4, 0, 1, 1',
1639 'sv.adde *3, *14, *7',
1640 'setvl 0, 0, 4, 0, 1, 1',
1641 'setvl 0, 0, 4, 0, 1, 1',
1642 'setvl 0, 0, 4, 0, 1, 1',
1643 'or 12, 3, 3',
1644 'or 11, 4, 4',
1645 'or 10, 5, 5',
1646 'or 9, 6, 6',
1647 'setvl 0, 0, 6, 0, 1, 1',
1648 'or 3, 21, 21',
1649 'or 4, 20, 20',
1650 'or 5, 12, 12',
1651 'or 6, 11, 11',
1652 'or 7, 10, 10',
1653 'or 8, 9, 9',
1654 'setvl 0, 0, 6, 0, 1, 1',
1655 'setvl 0, 0, 6, 0, 1, 1',
1656 'sv.or *30, *3, *3',
1657 'setvl 0, 0, 6, 0, 1, 1',
1658 'setvl 0, 0, 6, 0, 1, 1',
1659 'sv.or *3, *24, *24',
1660 'setvl 0, 0, 6, 0, 1, 1',
1661 'sv.or *14, *3, *3',
1662 'or 4, 14, 14',
1663 'or 11, 15, 15',
1664 'or 10, 16, 16',
1665 'or 9, 17, 17',
1666 'or 8, 18, 18',
1667 'or 3, 19, 19',
1668 'setvl 0, 0, 5, 0, 1, 1',
1669 'or 3, 4, 4',
1670 'or 4, 11, 11',
1671 'or 5, 10, 10',
1672 'or 6, 9, 9',
1673 'or 7, 8, 8',
1674 'setvl 0, 0, 5, 0, 1, 1',
1675 'setvl 0, 0, 5, 0, 1, 1',
1676 'sv.or *25, *3, *3',
1677 'setvl 0, 0, 6, 0, 1, 1',
1678 'setvl 0, 0, 6, 0, 1, 1',
1679 'sv.or *3, *36, *36',
1680 'setvl 0, 0, 6, 0, 1, 1',
1681 'sv.or *14, *3, *3',
1682 'or 4, 14, 14',
1683 'or 11, 15, 15',
1684 'or 10, 16, 16',
1685 'or 9, 17, 17',
1686 'or 8, 18, 18',
1687 'or 3, 19, 19',
1688 'setvl 0, 0, 5, 0, 1, 1',
1689 'or 3, 4, 4',
1690 'or 4, 11, 11',
1691 'or 5, 10, 10',
1692 'or 6, 9, 9',
1693 'or 7, 8, 8',
1694 'setvl 0, 0, 5, 0, 1, 1',
1695 'setvl 0, 0, 5, 0, 1, 1',
1696 'setvl 0, 0, 5, 0, 1, 1',
1697 'subfc 0, 0, 0',
1698 'setvl 0, 0, 5, 0, 1, 1',
1699 'sv.or *14, *25, *25',
1700 'setvl 0, 0, 5, 0, 1, 1',
1701 'sv.or *8, *3, *3',
1702 'setvl 0, 0, 5, 0, 1, 1',
1703 'sv.subfe *3, *14, *8',
1704 'setvl 0, 0, 5, 0, 1, 1',
1705 'sv.or *20, *3, *3',
1706 'setvl 0, 0, 6, 0, 1, 1',
1707 'setvl 0, 0, 6, 0, 1, 1',
1708 'sv.or *3, *30, *30',
1709 'setvl 0, 0, 6, 0, 1, 1',
1710 'sv.or *14, *3, *3',
1711 'or 4, 14, 14',
1712 'or 11, 15, 15',
1713 'or 10, 16, 16',
1714 'or 9, 17, 17',
1715 'or 8, 18, 18',
1716 'or 3, 19, 19',
1717 'setvl 0, 0, 5, 0, 1, 1',
1718 'or 3, 4, 4',
1719 'or 4, 11, 11',
1720 'or 5, 10, 10',
1721 'or 6, 9, 9',
1722 'or 7, 8, 8',
1723 'setvl 0, 0, 5, 0, 1, 1',
1724 'setvl 0, 0, 5, 0, 1, 1',
1725 'sv.or *30, *3, *3',
1726 'setvl 0, 0, 5, 0, 1, 1',
1727 'subfc 0, 0, 0',
1728 'setvl 0, 0, 5, 0, 1, 1',
1729 'sv.or *14, *30, *30',
1730 'setvl 0, 0, 5, 0, 1, 1',
1731 'sv.or *8, *20, *20',
1732 'setvl 0, 0, 5, 0, 1, 1',
1733 'sv.subfe *3, *14, *8',
1734 'setvl 0, 0, 5, 0, 1, 1',
1735 'sv.or *16, *3, *3',
1736 'setvl 0, 0, 5, 0, 1, 1',
1737 'setvl 0, 0, 5, 0, 1, 1',
1738 'sv.or *3, *25, *25',
1739 'setvl 0, 0, 5, 0, 1, 1',
1740 'or 29, 3, 3',
1741 'or 28, 4, 4',
1742 'or 8, 5, 5',
1743 'or 15, 6, 6',
1744 'or 14, 7, 7',
1745 'setvl 0, 0, 5, 0, 1, 1',
1746 'setvl 0, 0, 5, 0, 1, 1',
1747 'sv.or *3, *16, *16',
1748 'setvl 0, 0, 5, 0, 1, 1',
1749 'or 24, 3, 3',
1750 'or 23, 4, 4',
1751 'or 22, 5, 5',
1752 'or 21, 6, 6',
1753 'or 20, 7, 7',
1754 'setvl 0, 0, 5, 0, 1, 1',
1755 'setvl 0, 0, 5, 0, 1, 1',
1756 'sv.or *3, *30, *30',
1757 'setvl 0, 0, 5, 0, 1, 1',
1758 'or 27, 3, 3',
1759 'or 26, 4, 4',
1760 'or 12, 5, 5',
1761 'or 11, 6, 6',
1762 'or 3, 7, 7',
1763 'addi 3, 0, 0',
1764 'addi 3, 0, 0',
1765 'or 10, 3, 3',
1766 'or 3, 20, 20',
1767 'sradi 3, 3, 63',
1768 'or 9, 3, 3',
1769 'setvl 0, 0, 6, 0, 1, 1',
1770 'or 3, 8, 8',
1771 'or 4, 15, 15',
1772 'or 5, 14, 14',
1773 'or 6, 10, 10',
1774 'or 7, 10, 10',
1775 'or 8, 10, 10',
1776 'setvl 0, 0, 6, 0, 1, 1',
1777 'setvl 0, 0, 6, 0, 1, 1',
1778 'sv.or *14, *3, *3',
1779 'or 3, 24, 24',
1780 'or 4, 23, 23',
1781 'or 5, 22, 22',
1782 'or 6, 21, 21',
1783 'or 7, 20, 20',
1784 'or 8, 9, 9',
1785 'setvl 0, 0, 6, 0, 1, 1',
1786 'setvl 0, 0, 6, 0, 1, 1',
1787 'addic 0, 0, 0',
1788 'setvl 0, 0, 6, 0, 1, 1',
1789 'sv.or *20, *14, *14',
1790 'setvl 0, 0, 6, 0, 1, 1',
1791 'sv.or *14, *3, *3',
1792 'setvl 0, 0, 6, 0, 1, 1',
1793 'sv.adde *3, *20, *14',
1794 'setvl 0, 0, 6, 0, 1, 1',
1795 'setvl 0, 0, 6, 0, 1, 1',
1796 'setvl 0, 0, 6, 0, 1, 1',
1797 'sv.or *20, *3, *3',
1798 'or 19, 20, 20',
1799 'or 18, 21, 21',
1800 'or 3, 22, 22',
1801 'or 9, 23, 23',
1802 'or 8, 24, 24',
1803 'or 7, 25, 25',
1804 'setvl 0, 0, 4, 0, 1, 1',
1805 'or 4, 9, 9',
1806 'or 5, 8, 8',
1807 'or 6, 7, 7',
1808 'setvl 0, 0, 4, 0, 1, 1',
1809 'setvl 0, 0, 4, 0, 1, 1',
1810 'sv.or *7, *3, *3',
1811 'or 3, 27, 27',
1812 'or 4, 26, 26',
1813 'or 5, 12, 12',
1814 'or 6, 11, 11',
1815 'setvl 0, 0, 4, 0, 1, 1',
1816 'setvl 0, 0, 4, 0, 1, 1',
1817 'addic 0, 0, 0',
1818 'setvl 0, 0, 4, 0, 1, 1',
1819 'sv.or *14, *7, *7',
1820 'setvl 0, 0, 4, 0, 1, 1',
1821 'sv.or *7, *3, *3',
1822 'setvl 0, 0, 4, 0, 1, 1',
1823 'sv.adde *3, *14, *7',
1824 'setvl 0, 0, 4, 0, 1, 1',
1825 'setvl 0, 0, 4, 0, 1, 1',
1826 'setvl 0, 0, 4, 0, 1, 1',
1827 'or 15, 3, 3',
1828 'or 14, 4, 4',
1829 'or 12, 5, 5',
1830 'or 11, 6, 6',
1831 'setvl 0, 0, 8, 0, 1, 1',
1832 'or 3, 29, 29',
1833 'or 4, 28, 28',
1834 'or 5, 19, 19',
1835 'or 6, 18, 18',
1836 'or 7, 15, 15',
1837 'or 8, 14, 14',
1838 'or 9, 12, 12',
1839 'or 10, 11, 11',
1840 'setvl 0, 0, 8, 0, 1, 1',
1841 'setvl 0, 0, 8, 0, 1, 1',
1842 'setvl 0, 0, 8, 0, 1, 1',
1843 'setvl 0, 0, 8, 0, 1, 1',
1844 'sv.or/mrr *4, *3, *3',
1845 'or 3, 42, 42',
1846 'setvl 0, 0, 8, 0, 1, 1',
1847 'sv.std *4, 0(3)'
1848 ])
1849
1850
1851 if __name__ == "__main__":
1852 unittest.main()