1 from contextlib
import contextmanager
3 from typing
import Any
, Callable
, ContextManager
, Iterator
, Tuple
5 from bigint_presentation_code
.compiler_ir
import (GPR_SIZE_IN_BITS
,
7 GPR_VALUE_MASK
, BaseSimState
,
8 Fn
, GenAsmState
, OpKind
,
10 PreRASimState
, SSAVal
)
11 from bigint_presentation_code
.register_allocator
import allocate_registers
12 from bigint_presentation_code
.toom_cook
import (ToomCookInstance
, ToomCookMul
,
15 _StateFactory
= Callable
[[], ContextManager
[BaseSimState
]]
18 def simple_umul(fn
, lhs
, rhs
):
19 # type: (Fn, SSAVal, SSAVal) -> tuple[SSAVal, None]
20 return simple_mul(fn
=fn
, lhs
=lhs
, lhs_signed
=False, rhs
=rhs
,
21 rhs_signed
=False, name
="mul"), None
24 def get_pre_ra_state_factory(code
):
25 # type: (Mul) -> _StateFactory
28 state
= PreRASimState(ssa_vals
={}, memory
={})
29 with state
.set_as_current_debugging_state():
35 _MulFn
= Callable
[[Fn
, SSAVal
, SSAVal
], Tuple
[SSAVal
, Any
]]
37 def __init__(self
, mul
, lhs_size_in_words
, rhs_size_in_words
):
38 # type: (_MulFn, int, int) -> None
42 self
.dest_size_in_words
= lhs_size_in_words
+ rhs_size_in_words
43 self
.dest_size_in_bytes
= self
.dest_size_in_words
* GPR_SIZE_IN_BYTES
44 self
.lhs_size_in_words
= lhs_size_in_words
45 self
.lhs_size_in_bytes
= self
.lhs_size_in_words
* GPR_SIZE_IN_BYTES
46 self
.rhs_size_in_words
= rhs_size_in_words
47 self
.rhs_size_in_bytes
= self
.rhs_size_in_words
* GPR_SIZE_IN_BYTES
48 self
.lhs_offset
= self
.dest_size_in_bytes
+ self
.dest_offset
49 self
.rhs_offset
= self
.lhs_size_in_bytes
+ self
.lhs_offset
50 self
.ptr_in
= fn
.append_new_op(kind
=OpKind
.FuncArgR3
,
51 name
="ptr_in").outputs
[0]
52 self
.lhs_setvl
= fn
.append_new_op(
53 kind
=OpKind
.SetVLI
, immediates
=[lhs_size_in_words
],
54 maxvl
=lhs_size_in_words
, name
="lhs_setvl")
55 self
.load_lhs
= fn
.append_new_op(
56 kind
=OpKind
.SvLd
, immediates
=[self
.lhs_offset
],
57 input_vals
=[self
.ptr_in
, self
.lhs_setvl
.outputs
[0]],
58 name
="load_lhs", maxvl
=lhs_size_in_words
)
59 self
.rhs_setvl
= fn
.append_new_op(
60 kind
=OpKind
.SetVLI
, immediates
=[rhs_size_in_words
],
61 maxvl
=rhs_size_in_words
, name
="rhs_setvl")
62 self
.load_rhs
= fn
.append_new_op(
63 kind
=OpKind
.SvLd
, immediates
=[self
.rhs_offset
],
64 input_vals
=[self
.ptr_in
, self
.rhs_setvl
.outputs
[0]],
65 name
="load_rhs", maxvl
=rhs_size_in_words
)
67 fn
, self
.load_lhs
.outputs
[0], self
.load_rhs
.outputs
[0])
68 self
.dest_setvl
= fn
.append_new_op(
69 kind
=OpKind
.SetVLI
, immediates
=[self
.dest_size_in_words
],
70 maxvl
=self
.dest_size_in_words
, name
="dest_setvl")
71 self
.store
= fn
.append_new_op(
73 input_vals
=[self
.retval
[0], self
.ptr_in
,
74 self
.dest_setvl
.outputs
[0]],
75 immediates
=[self
.dest_offset
], maxvl
=self
.dest_size_in_words
,
79 def get_post_ra_state_factory(code
):
80 # type: (Mul) -> _StateFactory
81 ssa_val_to_loc_map
= allocate_registers(code
.fn
)
86 ssa_val_to_loc_map
=ssa_val_to_loc_map
,
87 memory
={}, loc_values
={})
91 class TestToomCook(unittest
.TestCase
):
94 def test_toom_2_repr(self
):
95 TOOM_2
= ToomCookInstance
.make_toom_2()
96 # print(repr(repr(TOOM_2)))
99 "ToomCookInstance(lhs_part_count=2, rhs_part_count=2, "
100 "eval_points=(0, 1, POINT_AT_INFINITY), "
102 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
104 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
106 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
107 "poly=EvalOpPoly({0: Fraction(1, 1), 1: Fraction(1, 1)})), "
108 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)}))),"
110 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
112 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
114 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
115 "poly=EvalOpPoly({0: Fraction(1, 1), 1: Fraction(1, 1)})), "
116 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)}))),"
118 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
121 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
123 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
124 "poly=EvalOpPoly({0: Fraction(-1, 1), 1: Fraction(1, 1)})), "
126 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)})), "
128 "0: Fraction(-1, 1), 1: Fraction(1, 1), 2: Fraction(-1, 1)})), "
129 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)}))))"
132 def test_toom_2_5_repr(self
):
133 TOOM_2_5
= ToomCookInstance
.make_toom_2_5()
134 # print(repr(repr(TOOM_2_5)))
137 "ToomCookInstance(lhs_part_count=3, rhs_part_count=2, "
138 "eval_points=(0, 1, -1, POINT_AT_INFINITY), lhs_eval_ops=("
139 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
142 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
143 "rhs=EvalOpInput(lhs=2, rhs=0, "
144 "poly=EvalOpPoly({2: Fraction(1, 1)})), "
145 "poly=EvalOpPoly({0: Fraction(1, 1), 2: Fraction(1, 1)})), "
146 "rhs=EvalOpInput(lhs=1, rhs=0, "
147 "poly=EvalOpPoly({1: Fraction(1, 1)})), "
149 "0: Fraction(1, 1), 1: Fraction(1, 1), 2: Fraction(1, 1)})), "
152 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
153 "rhs=EvalOpInput(lhs=2, rhs=0, "
154 "poly=EvalOpPoly({2: Fraction(1, 1)})), "
155 "poly=EvalOpPoly({0: Fraction(1, 1), 2: Fraction(1, 1)})), "
156 "rhs=EvalOpInput(lhs=1, rhs=0, "
157 "poly=EvalOpPoly({1: Fraction(1, 1)})), poly=EvalOpPoly("
158 "{0: Fraction(1, 1), 1: Fraction(-1, 1), 2: Fraction(1, 1)})), "
159 "EvalOpInput(lhs=2, rhs=0, "
160 "poly=EvalOpPoly({2: Fraction(1, 1)}))), rhs_eval_ops=("
161 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
162 "EvalOpAdd(lhs=EvalOpInput(lhs=0, rhs=0, "
163 "poly=EvalOpPoly({0: Fraction(1, 1)})), rhs="
164 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
165 "poly=EvalOpPoly({0: Fraction(1, 1), 1: Fraction(1, 1)})), "
167 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
168 "rhs=EvalOpInput(lhs=1, rhs=0, "
169 "poly=EvalOpPoly({1: Fraction(1, 1)})), "
170 "poly=EvalOpPoly({0: Fraction(1, 1), 1: Fraction(-1, 1)})), "
171 "EvalOpInput(lhs=1, rhs=0, "
172 "poly=EvalOpPoly({1: Fraction(1, 1)}))), "
174 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
175 "EvalOpSub(lhs=EvalOpExactDiv(lhs=EvalOpSub(lhs="
176 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
177 "rhs=EvalOpInput(lhs=2, rhs=0, "
178 "poly=EvalOpPoly({2: Fraction(1, 1)})), "
179 "poly=EvalOpPoly({1: Fraction(1, 1), 2: Fraction(-1, 1)})), "
181 "poly=EvalOpPoly({1: Fraction(1, 2), 2: Fraction(-1, 2)})), rhs="
182 "EvalOpInput(lhs=3, rhs=0, poly=EvalOpPoly({3: Fraction(1, 1)})), "
184 "{1: Fraction(1, 2), 2: Fraction(-1, 2), 3: Fraction(-1, 1)})), "
185 "EvalOpSub(lhs=EvalOpExactDiv(lhs=EvalOpAdd(lhs="
186 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
188 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)})), "
189 "poly=EvalOpPoly({1: Fraction(1, 1), 2: Fraction(1, 1)})), rhs=2, "
190 "poly=EvalOpPoly({1: Fraction(1, 2), 2: Fraction(1, 2)})), rhs="
191 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
193 "{0: Fraction(-1, 1), 1: Fraction(1, 2), 2: Fraction(1, 2)})), "
194 "EvalOpInput(lhs=3, rhs=0, poly=EvalOpPoly({3: Fraction(1, 1)}))))"
197 def test_reversed_toom_2_5_repr(self
):
198 TOOM_2_5
= ToomCookInstance
.make_toom_2_5().reversed()
199 # print(repr(repr(TOOM_2_5)))
202 "ToomCookInstance(lhs_part_count=2, rhs_part_count=3, "
203 "eval_points=(0, 1, -1, POINT_AT_INFINITY), lhs_eval_ops=("
204 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
206 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
208 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
209 "poly=EvalOpPoly({0: Fraction(1, 1), 1: Fraction(1, 1)})), "
211 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
213 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
214 "poly=EvalOpPoly({0: Fraction(1, 1), 1: Fraction(-1, 1)})), "
215 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)}))),"
217 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
218 "EvalOpAdd(lhs=EvalOpAdd(lhs="
219 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
221 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)})), "
222 "poly=EvalOpPoly({0: Fraction(1, 1), 2: Fraction(1, 1)})), rhs="
223 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
225 "{0: Fraction(1, 1), 1: Fraction(1, 1), 2: Fraction(1, 1)})), "
226 "EvalOpSub(lhs=EvalOpAdd(lhs="
227 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
229 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)})), "
230 "poly=EvalOpPoly({0: Fraction(1, 1), 2: Fraction(1, 1)})), rhs="
231 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
233 "{0: Fraction(1, 1), 1: Fraction(-1, 1), 2: Fraction(1, 1)})), "
234 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)}))),"
236 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
237 "EvalOpSub(lhs=EvalOpExactDiv(lhs=EvalOpSub(lhs="
238 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
240 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)})), "
241 "poly=EvalOpPoly({1: Fraction(1, 1), 2: Fraction(-1, 1)})), "
243 "poly=EvalOpPoly({1: Fraction(1, 2), 2: Fraction(-1, 2)})), rhs="
244 "EvalOpInput(lhs=3, rhs=0, poly=EvalOpPoly({3: Fraction(1, 1)})), "
246 "{1: Fraction(1, 2), 2: Fraction(-1, 2), 3: Fraction(-1, 1)})), "
247 "EvalOpSub(lhs=EvalOpExactDiv(lhs=EvalOpAdd(lhs="
248 "EvalOpInput(lhs=1, rhs=0, poly=EvalOpPoly({1: Fraction(1, 1)})), "
250 "EvalOpInput(lhs=2, rhs=0, poly=EvalOpPoly({2: Fraction(1, 1)})), "
251 "poly=EvalOpPoly({1: Fraction(1, 1), 2: Fraction(1, 1)})), rhs=2, "
252 "poly=EvalOpPoly({1: Fraction(1, 2), 2: Fraction(1, 2)})), rhs="
253 "EvalOpInput(lhs=0, rhs=0, poly=EvalOpPoly({0: Fraction(1, 1)})), "
255 "{0: Fraction(-1, 1), 1: Fraction(1, 2), 2: Fraction(1, 2)})), "
256 "EvalOpInput(lhs=3, rhs=0, poly=EvalOpPoly({3: Fraction(1, 1)}))))"
259 def test_simple_mul_192x192_pre_ra_sim(self
):
260 for lhs_signed
in False, True:
261 for rhs_signed
in False, True:
262 self
.tst_simple_mul_192x192_sim(
263 lhs_signed
=lhs_signed
, rhs_signed
=rhs_signed
,
264 get_state_factory
=get_pre_ra_state_factory
)
266 def test_simple_mul_192x192_post_ra_sim(self
):
267 for lhs_signed
in False, True:
268 for rhs_signed
in False, True:
269 self
.tst_simple_mul_192x192_sim(
270 lhs_signed
=lhs_signed
, rhs_signed
=rhs_signed
,
271 get_state_factory
=get_post_ra_state_factory
)
273 def tst_simple_mul_192x192_sim(
274 self
, lhs_signed
, # type: bool
275 rhs_signed
, # type: bool
276 get_state_factory
, # type: Callable[[Mul], _StateFactory]
279 # 0x000191acb262e15b_4c6b5f2b19e1a53e_821a2342132c5b57
280 # * 0x4a37c0567bcbab53_cf1f597598194ae6_208a49071aeec507
282 # int("0x00074736574206e_6f69746163696c70"
283 # "_69746c756d207469_622d3438333e2d32"
284 # "_3931783239312079_7261727469627261", base=0)
285 # == int.from_bytes(b"arbitrary 192x192->384-bit multiplication test",
287 lhs_value
= 0x000191acb262e15b_4c6b5f2b19e1a53e_821a2342132c5b57
288 rhs_value
= 0x4a37c0567bcbab53_cf1f597598194ae6_208a49071aeec507
289 prod_value
= int.from_bytes(
290 b
"arbitrary 192x192->384-bit multiplication test", 'little')
291 self
.assertEqual(lhs_value
* rhs_value
, prod_value
)
293 mul
=lambda fn
, lhs
, rhs
: (simple_mul(
294 fn
=fn
, lhs
=lhs
, lhs_signed
=lhs_signed
,
295 rhs
=rhs
, rhs_signed
=rhs_signed
, name
="mul"), None),
296 lhs_size_in_words
=3, rhs_size_in_words
=3)
297 state_factory
= get_state_factory(code
)
299 dest_ptr
= ptr_in
+ code
.dest_offset
300 lhs_ptr
= ptr_in
+ code
.lhs_offset
301 rhs_ptr
= ptr_in
+ code
.rhs_offset
302 for lhs_neg
in False, True:
303 for rhs_neg
in False, True:
304 if lhs_neg
and not lhs_signed
:
306 if rhs_neg
and not rhs_signed
:
308 with self
.subTest(lhs_signed
=lhs_signed
,
309 rhs_signed
=rhs_signed
,
310 lhs_neg
=lhs_neg
, rhs_neg
=rhs_neg
):
311 with
state_factory() as state
:
312 state
[code
.ptr_in
] = ptr_in
,
320 v
= (lhs
>> GPR_SIZE_IN_BITS
* i
) & GPR_VALUE_MASK
321 state
.store(lhs_ptr
+ i
* GPR_SIZE_IN_BYTES
, v
)
323 v
= (rhs
>> GPR_SIZE_IN_BITS
* i
) & GPR_VALUE_MASK
324 state
.store(rhs_ptr
+ i
* GPR_SIZE_IN_BYTES
, v
)
326 expected
= prod_value
327 if lhs_neg
!= rhs_neg
:
328 expected
= 2 ** 384 - expected
331 v
= state
.load(dest_ptr
+ GPR_SIZE_IN_BYTES
* i
)
332 prod
+= v
<< (GPR_SIZE_IN_BITS
* i
)
333 self
.assertEqual(hex(prod
), hex(expected
))
335 def test_simple_mul_192x192_ops(self
):
336 code
= Mul(mul
=simple_umul
, lhs_size_in_words
=3, rhs_size_in_words
=3)
341 " (<...outputs[0]: <I64>>) <= FuncArgR3\n"
343 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x3)\n"
345 " (<...outputs[0]: <I64*3>>) <= SvLd(\n"
346 " <ptr_in.outputs[0]: <I64>>,\n"
347 " <lhs_setvl.outputs[0]: <VL_MAXVL>>, 0x30)\n"
349 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x3)\n"
351 " (<...outputs[0]: <I64*3>>) <= SvLd(\n"
352 " <ptr_in.outputs[0]: <I64>>,\n"
353 " <rhs_setvl.outputs[0]: <VL_MAXVL>>, 0x48)\n"
355 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x3)\n"
357 " (<...outputs[0]: <I64>>, <...outputs[1]: <I64>>,\n"
358 " <...outputs[2]: <I64>>) <= Spread(\n"
359 " <load_rhs.outputs[0]: <I64*3>>,\n"
360 " <mul_rhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
362 " (<...outputs[0]: <I64>>) <= LI(0x0)\n"
364 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x3)\n"
366 " (<...outputs[0]: <I64>>) <= LI(0x0)\n"
368 " (<...outputs[0]: <I64*3>>, <...outputs[1]: <I64>>\n"
369 " ) <= SvMAddEDU(<load_lhs.outputs[0]: <I64*3>>,\n"
370 " <mul_rhs_spread.outputs[0]: <I64>>,\n"
371 " <mul_zero.outputs[0]: <I64>>,\n"
372 " <mul_lhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
373 "mul_0_mul_rt_spread:\n"
374 " (<...outputs[0]: <I64>>, <...outputs[1]: <I64>>,\n"
375 " <...outputs[2]: <I64>>) <= Spread(\n"
376 " <mul_0_mul.outputs[0]: <I64*3>>,\n"
377 " <mul_lhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
379 " (<...outputs[0]: <I64*3>>, <...outputs[1]: <I64>>\n"
380 " ) <= SvMAddEDU(<load_lhs.outputs[0]: <I64*3>>,\n"
381 " <mul_rhs_spread.outputs[1]: <I64>>,\n"
382 " <mul_zero.outputs[0]: <I64>>,\n"
383 " <mul_lhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
384 "mul_1_mul_rt_spread:\n"
385 " (<...outputs[0]: <I64>>, <...outputs[1]: <I64>>,\n"
386 " <...outputs[2]: <I64>>) <= Spread(\n"
387 " <mul_1_mul.outputs[0]: <I64*3>>,\n"
388 " <mul_lhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
389 "mul_1_cast_retval_zero:\n"
390 " (<...outputs[0]: <I64>>) <= LI(0x0)\n"
391 "mul_1_cast_pp_zero:\n"
392 " (<...outputs[0]: <I64>>) <= LI(0x0)\n"
394 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x5)\n"
395 "mul_1_retval_concat:\n"
396 " (<...outputs[0]: <I64*5>>) <= Concat(\n"
397 " <mul_0_mul_rt_spread.outputs[1]: <I64>>,\n"
398 " <mul_0_mul_rt_spread.outputs[2]: <I64>>,\n"
399 " <mul_0_mul.outputs[1]: <I64>>,\n"
400 " <mul_1_cast_retval_zero.outputs[0]: <I64>>,\n"
401 " <mul_1_cast_retval_zero.outputs[0]: <I64>>,\n"
402 " <mul_1_setvl.outputs[0]: <VL_MAXVL>>)\n"
404 " (<...outputs[0]: <I64*5>>) <= Concat(\n"
405 " <mul_1_mul_rt_spread.outputs[0]: <I64>>,\n"
406 " <mul_1_mul_rt_spread.outputs[1]: <I64>>,\n"
407 " <mul_1_mul_rt_spread.outputs[2]: <I64>>,\n"
408 " <mul_1_mul.outputs[1]: <I64>>,\n"
409 " <mul_1_cast_pp_zero.outputs[0]: <I64>>,\n"
410 " <mul_1_setvl.outputs[0]: <VL_MAXVL>>)\n"
412 " (<...outputs[0]: <CA>>) <= ClearCA\n"
414 " (<...outputs[0]: <I64*5>>, <...outputs[1]: <CA>>\n"
415 " ) <= SvAddE(<mul_1_retval_concat.outputs[0]: <I64*5>>,\n"
416 " <mul_1_pp_concat.outputs[0]: <I64*5>>,\n"
417 " <mul_1_clear_ca.outputs[0]: <CA>>,\n"
418 " <mul_1_setvl.outputs[0]: <VL_MAXVL>>)\n"
419 "mul_1_sum_spread:\n"
420 " (<...outputs[0]: <I64>>, <...outputs[1]: <I64>>,\n"
421 " <...outputs[2]: <I64>>, <...outputs[3]: <I64>>,\n"
422 " <...outputs[4]: <I64>>) <= Spread(\n"
423 " <mul_1_add.outputs[0]: <I64*5>>,\n"
424 " <mul_1_setvl.outputs[0]: <VL_MAXVL>>)\n"
426 " (<...outputs[0]: <I64*3>>, <...outputs[1]: <I64>>\n"
427 " ) <= SvMAddEDU(<load_lhs.outputs[0]: <I64*3>>,\n"
428 " <mul_rhs_spread.outputs[2]: <I64>>,\n"
429 " <mul_zero.outputs[0]: <I64>>,\n"
430 " <mul_lhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
431 "mul_2_mul_rt_spread:\n"
432 " (<...outputs[0]: <I64>>, <...outputs[1]: <I64>>,\n"
433 " <...outputs[2]: <I64>>) <= Spread(\n"
434 " <mul_2_mul.outputs[0]: <I64*3>>,\n"
435 " <mul_lhs_setvl.outputs[0]: <VL_MAXVL>>)\n"
437 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x4)\n"
438 "mul_2_retval_concat:\n"
439 " (<...outputs[0]: <I64*4>>) <= Concat(\n"
440 " <mul_1_sum_spread.outputs[1]: <I64>>,\n"
441 " <mul_1_sum_spread.outputs[2]: <I64>>,\n"
442 " <mul_1_sum_spread.outputs[3]: <I64>>,\n"
443 " <mul_1_sum_spread.outputs[4]: <I64>>,\n"
444 " <mul_2_setvl.outputs[0]: <VL_MAXVL>>)\n"
446 " (<...outputs[0]: <I64*4>>) <= Concat(\n"
447 " <mul_2_mul_rt_spread.outputs[0]: <I64>>,\n"
448 " <mul_2_mul_rt_spread.outputs[1]: <I64>>,\n"
449 " <mul_2_mul_rt_spread.outputs[2]: <I64>>,\n"
450 " <mul_2_mul.outputs[1]: <I64>>,\n"
451 " <mul_2_setvl.outputs[0]: <VL_MAXVL>>)\n"
453 " (<...outputs[0]: <CA>>) <= ClearCA\n"
455 " (<...outputs[0]: <I64*4>>, <...outputs[1]: <CA>>\n"
456 " ) <= SvAddE(<mul_2_retval_concat.outputs[0]: <I64*4>>,\n"
457 " <mul_2_pp_concat.outputs[0]: <I64*4>>,\n"
458 " <mul_2_clear_ca.outputs[0]: <CA>>,\n"
459 " <mul_2_setvl.outputs[0]: <VL_MAXVL>>)\n"
460 "mul_2_sum_spread:\n"
461 " (<...outputs[0]: <I64>>, <...outputs[1]: <I64>>,\n"
462 " <...outputs[2]: <I64>>, <...outputs[3]: <I64>>) <= Spread(\n"
463 " <mul_2_add.outputs[0]: <I64*4>>,\n"
464 " <mul_2_setvl.outputs[0]: <VL_MAXVL>>)\n"
466 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x6)\n"
468 " (<...outputs[0]: <I64*6>>) <= Concat(\n"
469 " <mul_0_mul_rt_spread.outputs[0]: <I64>>,\n"
470 " <mul_1_sum_spread.outputs[0]: <I64>>,\n"
471 " <mul_2_sum_spread.outputs[0]: <I64>>,\n"
472 " <mul_2_sum_spread.outputs[1]: <I64>>,\n"
473 " <mul_2_sum_spread.outputs[2]: <I64>>,\n"
474 " <mul_2_sum_spread.outputs[3]: <I64>>,\n"
475 " <mul_setvl.outputs[0]: <VL_MAXVL>>)\n"
477 " (<...outputs[0]: <VL_MAXVL>>) <= SetVLI(0x6)\n"
479 " SvStd(<mul_concat.outputs[0]: <I64*6>>,\n"
480 " <ptr_in.outputs[0]: <I64>>,\n"
481 " <dest_setvl.outputs[0]: <VL_MAXVL>>, 0x0)"
484 def test_simple_mul_192x192_reg_alloc(self
):
485 code
= Mul(mul
=simple_umul
, lhs_size_in_words
=3, rhs_size_in_words
=3)
487 assigned_registers
= allocate_registers(fn
)
489 repr(assigned_registers
), "{"
490 "<store_dest.inp2.setvl.outputs[0]: <VL_MAXVL>>: "
491 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
492 "<store_dest.inp1.copy.outputs[0]: <I64>>: "
493 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
494 "<store_dest.inp0.copy.outputs[0]: <I64*6>>: "
495 "Loc(kind=LocKind.GPR, start=4, reg_len=6), "
496 "<store_dest.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
497 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
498 "<dest_setvl.outputs[0]: <VL_MAXVL>>: "
499 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
500 "<mul_concat.out0.copy.outputs[0]: <I64*6>>: "
501 "Loc(kind=LocKind.GPR, start=3, reg_len=6), "
502 "<mul_concat.out0.setvl.outputs[0]: <VL_MAXVL>>: "
503 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
504 "<mul_concat.outputs[0]: <I64*6>>: "
505 "Loc(kind=LocKind.GPR, start=3, reg_len=6), "
506 "<mul_concat.inp0.copy.outputs[0]: <I64>>: "
507 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
508 "<mul_concat.inp1.copy.outputs[0]: <I64>>: "
509 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
510 "<mul_concat.inp2.copy.outputs[0]: <I64>>: "
511 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
512 "<mul_concat.inp3.copy.outputs[0]: <I64>>: "
513 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
514 "<mul_concat.inp4.copy.outputs[0]: <I64>>: "
515 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
516 "<mul_concat.inp5.copy.outputs[0]: <I64>>: "
517 "Loc(kind=LocKind.GPR, start=8, reg_len=1), "
518 "<mul_concat.inp6.setvl.outputs[0]: <VL_MAXVL>>: "
519 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
520 "<mul_setvl.outputs[0]: <VL_MAXVL>>: "
521 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
522 "<mul_2_sum_spread.out3.copy.outputs[0]: <I64>>: "
523 "Loc(kind=LocKind.GPR, start=9, reg_len=1), "
524 "<mul_2_sum_spread.out2.copy.outputs[0]: <I64>>: "
525 "Loc(kind=LocKind.GPR, start=10, reg_len=1), "
526 "<mul_2_sum_spread.out1.copy.outputs[0]: <I64>>: "
527 "Loc(kind=LocKind.GPR, start=11, reg_len=1), "
528 "<mul_2_sum_spread.out0.copy.outputs[0]: <I64>>: "
529 "Loc(kind=LocKind.GPR, start=12, reg_len=1), "
530 "<mul_2_sum_spread.outputs[0]: <I64>>: "
531 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
532 "<mul_2_sum_spread.outputs[1]: <I64>>: "
533 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
534 "<mul_2_sum_spread.outputs[2]: <I64>>: "
535 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
536 "<mul_2_sum_spread.outputs[3]: <I64>>: "
537 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
538 "<mul_2_sum_spread.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
539 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
540 "<mul_2_sum_spread.inp0.copy.outputs[0]: <I64*4>>: "
541 "Loc(kind=LocKind.GPR, start=3, reg_len=4), "
542 "<mul_2_sum_spread.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
543 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
544 "<mul_2_add.out0.copy.outputs[0]: <I64*4>>: "
545 "Loc(kind=LocKind.GPR, start=3, reg_len=4), "
546 "<mul_2_add.out0.setvl.outputs[0]: <VL_MAXVL>>: "
547 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
548 "<mul_2_clear_ca.outputs[0]: <CA>>: "
549 "Loc(kind=LocKind.CA, start=0, reg_len=1), "
550 "<mul_2_add.outputs[1]: <CA>>: "
551 "Loc(kind=LocKind.CA, start=0, reg_len=1), "
552 "<mul_2_add.outputs[0]: <I64*4>>: "
553 "Loc(kind=LocKind.GPR, start=3, reg_len=4), "
554 "<mul_2_add.inp3.setvl.outputs[0]: <VL_MAXVL>>: "
555 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
556 "<mul_2_add.inp1.copy.outputs[0]: <I64*4>>: "
557 "Loc(kind=LocKind.GPR, start=7, reg_len=4), "
558 "<mul_2_add.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
559 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
560 "<mul_2_add.inp0.copy.outputs[0]: <I64*4>>: "
561 "Loc(kind=LocKind.GPR, start=14, reg_len=4), "
562 "<mul_2_add.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
563 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
564 "<mul_2_pp_concat.out0.copy.outputs[0]: <I64*4>>: "
565 "Loc(kind=LocKind.GPR, start=3, reg_len=4), "
566 "<mul_2_pp_concat.out0.setvl.outputs[0]: <VL_MAXVL>>: "
567 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
568 "<mul_2_pp_concat.outputs[0]: <I64*4>>: "
569 "Loc(kind=LocKind.GPR, start=3, reg_len=4), "
570 "<mul_2_pp_concat.inp0.copy.outputs[0]: <I64>>: "
571 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
572 "<mul_2_pp_concat.inp1.copy.outputs[0]: <I64>>: "
573 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
574 "<mul_2_pp_concat.inp2.copy.outputs[0]: <I64>>: "
575 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
576 "<mul_2_pp_concat.inp3.copy.outputs[0]: <I64>>: "
577 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
578 "<mul_2_pp_concat.inp4.setvl.outputs[0]: <VL_MAXVL>>: "
579 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
580 "<mul_2_retval_concat.out0.copy.outputs[0]: <I64*4>>: "
581 "Loc(kind=LocKind.GPR, start=7, reg_len=4), "
582 "<mul_2_retval_concat.out0.setvl.outputs[0]: <VL_MAXVL>>: "
583 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
584 "<mul_2_retval_concat.outputs[0]: <I64*4>>: "
585 "Loc(kind=LocKind.GPR, start=3, reg_len=4), "
586 "<mul_2_retval_concat.inp0.copy.outputs[0]: <I64>>: "
587 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
588 "<mul_2_retval_concat.inp1.copy.outputs[0]: <I64>>: "
589 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
590 "<mul_2_retval_concat.inp2.copy.outputs[0]: <I64>>: "
591 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
592 "<mul_2_retval_concat.inp3.copy.outputs[0]: <I64>>: "
593 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
594 "<mul_2_retval_concat.inp4.setvl.outputs[0]: <VL_MAXVL>>: "
595 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
596 "<mul_2_setvl.outputs[0]: <VL_MAXVL>>: "
597 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
598 "<mul_2_mul_rt_spread.out2.copy.outputs[0]: <I64>>: "
599 "Loc(kind=LocKind.GPR, start=11, reg_len=1), "
600 "<mul_2_mul_rt_spread.out1.copy.outputs[0]: <I64>>: "
601 "Loc(kind=LocKind.GPR, start=12, reg_len=1), "
602 "<mul_2_mul_rt_spread.out0.copy.outputs[0]: <I64>>: "
603 "Loc(kind=LocKind.GPR, start=14, reg_len=1), "
604 "<mul_2_mul_rt_spread.outputs[0]: <I64>>: "
605 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
606 "<mul_2_mul_rt_spread.outputs[1]: <I64>>: "
607 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
608 "<mul_2_mul_rt_spread.outputs[2]: <I64>>: "
609 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
610 "<mul_2_mul_rt_spread.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
611 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
612 "<mul_2_mul_rt_spread.inp0.copy.outputs[0]: <I64*3>>: "
613 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
614 "<mul_2_mul_rt_spread.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
615 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
616 "<mul_2_mul.out1.copy.outputs[0]: <I64>>: "
617 "Loc(kind=LocKind.GPR, start=15, reg_len=1), "
618 "<mul_2_mul.out0.copy.outputs[0]: <I64*3>>: "
619 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
620 "<mul_2_mul.out0.setvl.outputs[0]: <VL_MAXVL>>: "
621 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
622 "<mul_2_mul.inp2.copy.outputs[0]: <I64>>: "
623 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
624 "<mul_2_mul.outputs[1]: <I64>>: "
625 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
626 "<mul_2_mul.outputs[0]: <I64*3>>: "
627 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
628 "<mul_2_mul.inp3.setvl.outputs[0]: <VL_MAXVL>>: "
629 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
630 "<mul_2_mul.inp1.copy.outputs[0]: <I64>>: "
631 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
632 "<mul_2_mul.inp0.copy.outputs[0]: <I64*3>>: "
633 "Loc(kind=LocKind.GPR, start=8, reg_len=3), "
634 "<mul_2_mul.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
635 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
636 "<mul_1_sum_spread.out4.copy.outputs[0]: <I64>>: "
637 "Loc(kind=LocKind.GPR, start=16, reg_len=1), "
638 "<mul_1_sum_spread.out3.copy.outputs[0]: <I64>>: "
639 "Loc(kind=LocKind.GPR, start=17, reg_len=1), "
640 "<mul_1_sum_spread.out2.copy.outputs[0]: <I64>>: "
641 "Loc(kind=LocKind.GPR, start=18, reg_len=1), "
642 "<mul_1_sum_spread.out1.copy.outputs[0]: <I64>>: "
643 "Loc(kind=LocKind.GPR, start=19, reg_len=1), "
644 "<mul_1_sum_spread.out0.copy.outputs[0]: <I64>>: "
645 "Loc(kind=LocKind.GPR, start=20, reg_len=1), "
646 "<mul_1_sum_spread.outputs[0]: <I64>>: "
647 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
648 "<mul_1_sum_spread.outputs[1]: <I64>>: "
649 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
650 "<mul_1_sum_spread.outputs[2]: <I64>>: "
651 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
652 "<mul_1_sum_spread.outputs[3]: <I64>>: "
653 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
654 "<mul_1_sum_spread.outputs[4]: <I64>>: "
655 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
656 "<mul_1_sum_spread.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
657 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
658 "<mul_1_sum_spread.inp0.copy.outputs[0]: <I64*5>>: "
659 "Loc(kind=LocKind.GPR, start=3, reg_len=5), "
660 "<mul_1_sum_spread.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
661 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
662 "<mul_1_add.out0.copy.outputs[0]: <I64*5>>: "
663 "Loc(kind=LocKind.GPR, start=3, reg_len=5), "
664 "<mul_1_add.out0.setvl.outputs[0]: <VL_MAXVL>>: "
665 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
666 "<mul_1_clear_ca.outputs[0]: <CA>>: "
667 "Loc(kind=LocKind.CA, start=0, reg_len=1), "
668 "<mul_1_add.outputs[1]: <CA>>: "
669 "Loc(kind=LocKind.CA, start=0, reg_len=1), "
670 "<mul_1_add.outputs[0]: <I64*5>>: "
671 "Loc(kind=LocKind.GPR, start=3, reg_len=5), "
672 "<mul_1_add.inp3.setvl.outputs[0]: <VL_MAXVL>>: "
673 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
674 "<mul_1_add.inp1.copy.outputs[0]: <I64*5>>: "
675 "Loc(kind=LocKind.GPR, start=8, reg_len=5), "
676 "<mul_1_add.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
677 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
678 "<mul_1_add.inp0.copy.outputs[0]: <I64*5>>: "
679 "Loc(kind=LocKind.GPR, start=14, reg_len=5), "
680 "<mul_1_add.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
681 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
682 "<mul_1_pp_concat.out0.copy.outputs[0]: <I64*5>>: "
683 "Loc(kind=LocKind.GPR, start=3, reg_len=5), "
684 "<mul_1_pp_concat.out0.setvl.outputs[0]: <VL_MAXVL>>: "
685 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
686 "<mul_1_pp_concat.outputs[0]: <I64*5>>: "
687 "Loc(kind=LocKind.GPR, start=3, reg_len=5), "
688 "<mul_1_pp_concat.inp0.copy.outputs[0]: <I64>>: "
689 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
690 "<mul_1_pp_concat.inp1.copy.outputs[0]: <I64>>: "
691 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
692 "<mul_1_pp_concat.inp2.copy.outputs[0]: <I64>>: "
693 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
694 "<mul_1_pp_concat.inp3.copy.outputs[0]: <I64>>: "
695 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
696 "<mul_1_pp_concat.inp4.copy.outputs[0]: <I64>>: "
697 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
698 "<mul_1_pp_concat.inp5.setvl.outputs[0]: <VL_MAXVL>>: "
699 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
700 "<mul_1_retval_concat.out0.copy.outputs[0]: <I64*5>>: "
701 "Loc(kind=LocKind.GPR, start=8, reg_len=5), "
702 "<mul_1_retval_concat.out0.setvl.outputs[0]: <VL_MAXVL>>: "
703 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
704 "<mul_1_retval_concat.outputs[0]: <I64*5>>: "
705 "Loc(kind=LocKind.GPR, start=3, reg_len=5), "
706 "<mul_1_retval_concat.inp0.copy.outputs[0]: <I64>>: "
707 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
708 "<mul_1_retval_concat.inp1.copy.outputs[0]: <I64>>: "
709 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
710 "<mul_1_retval_concat.inp2.copy.outputs[0]: <I64>>: "
711 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
712 "<mul_1_retval_concat.inp3.copy.outputs[0]: <I64>>: "
713 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
714 "<mul_1_retval_concat.inp4.copy.outputs[0]: <I64>>: "
715 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
716 "<mul_1_retval_concat.inp5.setvl.outputs[0]: <VL_MAXVL>>: "
717 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
718 "<mul_1_setvl.outputs[0]: <VL_MAXVL>>: "
719 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
720 "<mul_1_cast_pp_zero.out0.copy.outputs[0]: <I64>>: "
721 "Loc(kind=LocKind.GPR, start=14, reg_len=1), "
722 "<mul_1_cast_pp_zero.outputs[0]: <I64>>: "
723 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
724 "<mul_1_cast_retval_zero.out0.copy.outputs[0]: <I64>>: "
725 "Loc(kind=LocKind.GPR, start=8, reg_len=1), "
726 "<mul_1_cast_retval_zero.outputs[0]: <I64>>: "
727 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
728 "<mul_1_mul_rt_spread.out2.copy.outputs[0]: <I64>>: "
729 "Loc(kind=LocKind.GPR, start=15, reg_len=1), "
730 "<mul_1_mul_rt_spread.out1.copy.outputs[0]: <I64>>: "
731 "Loc(kind=LocKind.GPR, start=16, reg_len=1), "
732 "<mul_1_mul_rt_spread.out0.copy.outputs[0]: <I64>>: "
733 "Loc(kind=LocKind.GPR, start=17, reg_len=1), "
734 "<mul_1_mul_rt_spread.outputs[0]: <I64>>: "
735 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
736 "<mul_1_mul_rt_spread.outputs[1]: <I64>>: "
737 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
738 "<mul_1_mul_rt_spread.outputs[2]: <I64>>: "
739 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
740 "<mul_1_mul_rt_spread.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
741 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
742 "<mul_1_mul_rt_spread.inp0.copy.outputs[0]: <I64*3>>: "
743 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
744 "<mul_1_mul_rt_spread.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
745 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
746 "<mul_1_mul.out1.copy.outputs[0]: <I64>>: "
747 "Loc(kind=LocKind.GPR, start=18, reg_len=1), "
748 "<mul_1_mul.out0.copy.outputs[0]: <I64*3>>: "
749 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
750 "<mul_1_mul.out0.setvl.outputs[0]: <VL_MAXVL>>: "
751 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
752 "<mul_1_mul.inp2.copy.outputs[0]: <I64>>: "
753 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
754 "<mul_1_mul.outputs[1]: <I64>>: "
755 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
756 "<mul_1_mul.outputs[0]: <I64*3>>: "
757 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
758 "<mul_1_mul.inp3.setvl.outputs[0]: <VL_MAXVL>>: "
759 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
760 "<mul_1_mul.inp1.copy.outputs[0]: <I64>>: "
761 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
762 "<mul_1_mul.inp0.copy.outputs[0]: <I64*3>>: "
763 "Loc(kind=LocKind.GPR, start=8, reg_len=3), "
764 "<mul_1_mul.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
765 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
766 "<mul_0_mul_rt_spread.out2.copy.outputs[0]: <I64>>: "
767 "Loc(kind=LocKind.GPR, start=11, reg_len=1), "
768 "<mul_0_mul_rt_spread.out1.copy.outputs[0]: <I64>>: "
769 "Loc(kind=LocKind.GPR, start=12, reg_len=1), "
770 "<mul_0_mul_rt_spread.out0.copy.outputs[0]: <I64>>: "
771 "Loc(kind=LocKind.GPR, start=21, reg_len=1), "
772 "<mul_0_mul_rt_spread.outputs[0]: <I64>>: "
773 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
774 "<mul_0_mul_rt_spread.outputs[1]: <I64>>: "
775 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
776 "<mul_0_mul_rt_spread.outputs[2]: <I64>>: "
777 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
778 "<mul_0_mul_rt_spread.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
779 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
780 "<mul_0_mul_rt_spread.inp0.copy.outputs[0]: <I64*3>>: "
781 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
782 "<mul_0_mul_rt_spread.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
783 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
784 "<mul_0_mul.out1.copy.outputs[0]: <I64>>: "
785 "Loc(kind=LocKind.GPR, start=19, reg_len=1), "
786 "<mul_0_mul.out0.copy.outputs[0]: <I64*3>>: "
787 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
788 "<mul_0_mul.out0.setvl.outputs[0]: <VL_MAXVL>>: "
789 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
790 "<mul_0_mul.inp2.copy.outputs[0]: <I64>>: "
791 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
792 "<mul_0_mul.outputs[1]: <I64>>: "
793 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
794 "<mul_0_mul.outputs[0]: <I64*3>>: "
795 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
796 "<mul_0_mul.inp3.setvl.outputs[0]: <VL_MAXVL>>: "
797 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
798 "<mul_0_mul.inp1.copy.outputs[0]: <I64>>: "
799 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
800 "<mul_0_mul.inp0.copy.outputs[0]: <I64*3>>: "
801 "Loc(kind=LocKind.GPR, start=8, reg_len=3), "
802 "<mul_0_mul.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
803 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
804 "<mul_zero2.out0.copy.outputs[0]: <I64>>: "
805 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
806 "<mul_zero2.outputs[0]: <I64>>: "
807 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
808 "<mul_lhs_setvl.outputs[0]: <VL_MAXVL>>: "
809 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
810 "<mul_zero.out0.copy.outputs[0]: <I64>>: "
811 "Loc(kind=LocKind.GPR, start=22, reg_len=1), "
812 "<mul_zero.outputs[0]: <I64>>: "
813 "Loc(kind=LocKind.GPR, start=3, reg_len=1), "
814 "<mul_rhs_spread.out2.copy.outputs[0]: <I64>>: "
815 "Loc(kind=LocKind.GPR, start=23, reg_len=1), "
816 "<mul_rhs_spread.out1.copy.outputs[0]: <I64>>: "
817 "Loc(kind=LocKind.GPR, start=14, reg_len=1), "
818 "<mul_rhs_spread.out0.copy.outputs[0]: <I64>>: "
819 "Loc(kind=LocKind.GPR, start=4, reg_len=1), "
820 "<mul_rhs_spread.outputs[0]: <I64>>: "
821 "Loc(kind=LocKind.GPR, start=5, reg_len=1), "
822 "<mul_rhs_spread.outputs[1]: <I64>>: "
823 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
824 "<mul_rhs_spread.outputs[2]: <I64>>: "
825 "Loc(kind=LocKind.GPR, start=7, reg_len=1), "
826 "<mul_rhs_spread.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
827 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
828 "<mul_rhs_spread.inp0.copy.outputs[0]: <I64*3>>: "
829 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
830 "<mul_rhs_spread.inp0.setvl.outputs[0]: <VL_MAXVL>>: "
831 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
832 "<mul_rhs_setvl.outputs[0]: <VL_MAXVL>>: "
833 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
834 "<load_rhs.out0.copy.outputs[0]: <I64*3>>: "
835 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
836 "<load_rhs.out0.setvl.outputs[0]: <VL_MAXVL>>: "
837 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
838 "<load_rhs.outputs[0]: <I64*3>>: "
839 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
840 "<load_rhs.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
841 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
842 "<load_rhs.inp0.copy.outputs[0]: <I64>>: "
843 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
844 "<rhs_setvl.outputs[0]: <VL_MAXVL>>: "
845 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
846 "<load_lhs.out0.copy.outputs[0]: <I64*3>>: "
847 "Loc(kind=LocKind.GPR, start=24, reg_len=3), "
848 "<load_lhs.out0.setvl.outputs[0]: <VL_MAXVL>>: "
849 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
850 "<load_lhs.outputs[0]: <I64*3>>: "
851 "Loc(kind=LocKind.GPR, start=3, reg_len=3), "
852 "<load_lhs.inp1.setvl.outputs[0]: <VL_MAXVL>>: "
853 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
854 "<load_lhs.inp0.copy.outputs[0]: <I64>>: "
855 "Loc(kind=LocKind.GPR, start=6, reg_len=1), "
856 "<lhs_setvl.outputs[0]: <VL_MAXVL>>: "
857 "Loc(kind=LocKind.VL_MAXVL, start=0, reg_len=1), "
858 "<ptr_in.out0.copy.outputs[0]: <I64>>: "
859 "Loc(kind=LocKind.GPR, start=27, reg_len=1), "
860 "<ptr_in.outputs[0]: <I64>>: "
861 "Loc(kind=LocKind.GPR, start=3, reg_len=1)"
864 def test_simple_mul_192x192_asm(self
):
865 code
= Mul(mul
=simple_umul
, lhs_size_in_words
=3, rhs_size_in_words
=3)
867 assigned_registers
= allocate_registers(fn
)
868 gen_asm_state
= GenAsmState(assigned_registers
)
869 fn
.gen_asm(gen_asm_state
)
870 self
.assertEqual(gen_asm_state
.output
, [
872 'setvl 0, 0, 3, 0, 1, 1',
874 'setvl 0, 0, 3, 0, 1, 1',
876 'setvl 0, 0, 3, 0, 1, 1',
878 'setvl 0, 0, 3, 0, 1, 1',
880 'setvl 0, 0, 3, 0, 1, 1',
882 'setvl 0, 0, 3, 0, 1, 1',
883 'setvl 0, 0, 3, 0, 1, 1',
884 'setvl 0, 0, 3, 0, 1, 1',
885 'setvl 0, 0, 3, 0, 1, 1',
886 'sv.or/mrr *5, *3, *3',
892 'setvl 0, 0, 3, 0, 1, 1',
894 'setvl 0, 0, 3, 0, 1, 1',
895 'sv.or *8, *24, *24',
898 'setvl 0, 0, 3, 0, 1, 1',
899 'sv.maddedu *3, *8, 7, 6',
900 'setvl 0, 0, 3, 0, 1, 1',
902 'setvl 0, 0, 3, 0, 1, 1',
903 'setvl 0, 0, 3, 0, 1, 1',
907 'setvl 0, 0, 3, 0, 1, 1',
908 'sv.or *8, *24, *24',
911 'setvl 0, 0, 3, 0, 1, 1',
912 'sv.maddedu *3, *8, 7, 6',
913 'setvl 0, 0, 3, 0, 1, 1',
915 'setvl 0, 0, 3, 0, 1, 1',
916 'setvl 0, 0, 3, 0, 1, 1',
924 'setvl 0, 0, 5, 0, 1, 1',
930 'setvl 0, 0, 5, 0, 1, 1',
931 'setvl 0, 0, 5, 0, 1, 1',
938 'setvl 0, 0, 5, 0, 1, 1',
939 'setvl 0, 0, 5, 0, 1, 1',
941 'setvl 0, 0, 5, 0, 1, 1',
943 'setvl 0, 0, 5, 0, 1, 1',
945 'setvl 0, 0, 5, 0, 1, 1',
946 'sv.adde *3, *14, *8',
947 'setvl 0, 0, 5, 0, 1, 1',
948 'setvl 0, 0, 5, 0, 1, 1',
949 'setvl 0, 0, 5, 0, 1, 1',
955 'setvl 0, 0, 3, 0, 1, 1',
956 'sv.or *8, *24, *24',
959 'setvl 0, 0, 3, 0, 1, 1',
960 'sv.maddedu *3, *8, 7, 6',
961 'setvl 0, 0, 3, 0, 1, 1',
963 'setvl 0, 0, 3, 0, 1, 1',
964 'setvl 0, 0, 3, 0, 1, 1',
968 'setvl 0, 0, 4, 0, 1, 1',
973 'setvl 0, 0, 4, 0, 1, 1',
974 'setvl 0, 0, 4, 0, 1, 1',
980 'setvl 0, 0, 4, 0, 1, 1',
981 'setvl 0, 0, 4, 0, 1, 1',
983 'setvl 0, 0, 4, 0, 1, 1',
985 'setvl 0, 0, 4, 0, 1, 1',
987 'setvl 0, 0, 4, 0, 1, 1',
988 'sv.adde *3, *14, *7',
989 'setvl 0, 0, 4, 0, 1, 1',
990 'setvl 0, 0, 4, 0, 1, 1',
991 'setvl 0, 0, 4, 0, 1, 1',
996 'setvl 0, 0, 6, 0, 1, 1',
1003 'setvl 0, 0, 6, 0, 1, 1',
1004 'setvl 0, 0, 6, 0, 1, 1',
1005 'setvl 0, 0, 6, 0, 1, 1',
1006 'setvl 0, 0, 6, 0, 1, 1',
1007 'sv.or/mrr *4, *3, *3',
1009 'setvl 0, 0, 6, 0, 1, 1',
1013 def toom_2_mul_256x256(self
, lhs_signed
, rhs_signed
):
1014 # type: (bool, bool) -> Mul
1015 TOOM_2
= ToomCookInstance
.make_toom_2()
1018 def mul(fn
, lhs
, rhs
):
1019 # type: (Fn, SSAVal, SSAVal) -> tuple[SSAVal, ToomCookMul]
1020 v
= ToomCookMul(fn
=fn
, lhs
=lhs
, lhs_signed
=lhs_signed
, rhs
=rhs
,
1021 rhs_signed
=rhs_signed
, instances
=instances
)
1023 return Mul(mul
=mul
, lhs_size_in_words
=4, rhs_size_in_words
=4)
1025 def make_256x256_mul_test_cases(self
, lhs_signed
, rhs_signed
):
1026 # type: (bool, bool) -> Iterator[tuple[int, int, int]]
1027 # test multiplying `+-1 << n` and:
1028 # 0xc162321a5eaad80b_4b86bb0efdfb93c0_a789ff04cc11b157_eaa08e29fb197621
1030 # 0x3138710167583371_998af336a8fac64d_e6da3737090787fe_85ba09ea701f4af2
1033 # "252e6e6f69746163_696c7069746c754d_"
1034 # "2061627573746172_614b202d20322d4d_"
1035 # "4f4f5420676e6973_75206c756d20746e_"
1036 # "6967696220746962_2d36353278363532", base=0)
1037 # == int.from_bytes(b'256x256-bit bigint mul using TOOM-2 '
1038 # b'- Karatsuba Multiplication.%', 'little')
1039 lhs_value_in
= (0xc162321a5eaad80b_4b86bb0efdfb93c0 << 128) \
1040 |
0xa789ff04cc11b157_eaa08e29fb197621
1041 rhs_value_in
= (0x3138710167583371_998af336a8fac64d << 128) \
1042 |
0xe6da3737090787fe_85ba09ea701f4af2
1043 prod_value_in
= int.from_bytes(
1044 b
'256x256-bit bigint mul using TOOM-2 '
1045 b
'- Karatsuba Multiplication.%', 'little')
1046 self
.assertEqual(lhs_value_in
* rhs_value_in
, prod_value_in
)
1047 shifts
= [*range(0, 256, 16), *range(15, 256, 16)]
1048 lhs_values
= [1 << i
for i
in shifts
] + [0, lhs_value_in
]
1049 rhs_values
= [1 << i
for i
in shifts
] + [0, rhs_value_in
]
1051 lhs_values
.extend([-i
for i
in lhs_values
])
1053 rhs_values
.extend([-i
for i
in rhs_values
])
1056 # type: (int) -> tuple[bool, int]
1057 return abs(v
) in (lhs_value_in
, rhs_value_in
), v
% (1 << 256)
1059 lhs_values
.sort(key
=key
)
1060 rhs_values
.sort(key
=key
)
1061 for lhs_value
in lhs_values
:
1062 for rhs_value
in rhs_values
:
1063 lhs_value
%= 1 << 256
1064 rhs_value
%= 1 << 256
1065 if lhs_value
>> 255 != 0 and lhs_signed
:
1066 lhs_value
-= 1 << 256
1067 if rhs_value
>> 255 != 0 and rhs_signed
:
1068 rhs_value
-= 1 << 256
1069 prod_value
= lhs_value
* rhs_value
1070 lhs_value
%= 1 << 256
1071 rhs_value
%= 1 << 256
1072 prod_value
%= 1 << 512
1073 yield lhs_value
, rhs_value
, prod_value
1075 def tst_toom_2_mul_256x256_sim(
1076 self
, lhs_signed
, # type: bool
1077 rhs_signed
, # type: bool
1078 get_state_factory
, # type: Callable[[Mul], _StateFactory]
1080 code
= self
.toom_2_mul_256x256(
1081 lhs_signed
=lhs_signed
, rhs_signed
=rhs_signed
)
1082 print(code
.retval
[1])
1083 print(code
.fn
.ops_to_str())
1084 state_factory
= get_state_factory(code
)
1086 dest_ptr
= ptr_in
+ code
.dest_offset
1087 lhs_ptr
= ptr_in
+ code
.lhs_offset
1088 rhs_ptr
= ptr_in
+ code
.rhs_offset
1089 values
= self
.make_256x256_mul_test_cases(
1090 lhs_signed
=lhs_signed
, rhs_signed
=rhs_signed
)
1091 for lhs_value
, rhs_value
, prod_value
in values
:
1092 with self
.subTest(lhs_signed
=lhs_signed
, rhs_signed
=rhs_signed
,
1093 lhs_value
=hex(lhs_value
),
1094 rhs_value
=hex(rhs_value
),
1095 prod_value
=hex(prod_value
)):
1096 with
state_factory() as state
:
1097 state
[code
.ptr_in
] = ptr_in
,
1099 v
= lhs_value
>> GPR_SIZE_IN_BITS
* i
1101 state
.store(lhs_ptr
+ i
* GPR_SIZE_IN_BYTES
, v
)
1103 v
= rhs_value
>> GPR_SIZE_IN_BITS
* i
1105 state
.store(rhs_ptr
+ i
* GPR_SIZE_IN_BYTES
, v
)
1109 v
= state
.load(dest_ptr
+ GPR_SIZE_IN_BYTES
* i
)
1110 prod
+= v
<< (GPR_SIZE_IN_BITS
* i
)
1111 self
.assertEqual(hex(prod
), hex(prod_value
),
1112 f
"failed: state={state}")
1114 def test_toom_2_mul_256x256_pre_ra_sim(self
):
1115 for lhs_signed
in False, True:
1116 for rhs_signed
in False, True:
1117 self
.tst_toom_2_mul_256x256_sim(
1118 lhs_signed
=lhs_signed
, rhs_signed
=rhs_signed
,
1119 get_state_factory
=get_pre_ra_state_factory
)
1121 def test_toom_2_mul_256x256_uu_post_ra_sim(self
):
1122 self
.tst_toom_2_mul_256x256_sim(
1123 lhs_signed
=False, rhs_signed
=False,
1124 get_state_factory
=get_post_ra_state_factory
)
1126 def test_toom_2_mul_256x256_su_post_ra_sim(self
):
1127 self
.tst_toom_2_mul_256x256_sim(
1128 lhs_signed
=True, rhs_signed
=False,
1129 get_state_factory
=get_post_ra_state_factory
)
1131 def test_toom_2_mul_256x256_us_post_ra_sim(self
):
1132 self
.tst_toom_2_mul_256x256_sim(
1133 lhs_signed
=False, rhs_signed
=True,
1134 get_state_factory
=get_post_ra_state_factory
)
1136 def test_toom_2_mul_256x256_ss_post_ra_sim(self
):
1137 self
.tst_toom_2_mul_256x256_sim(
1138 lhs_signed
=True, rhs_signed
=True,
1139 get_state_factory
=get_post_ra_state_factory
)
1141 def test_toom_2_mul_256x256_asm(self
):
1142 code
= self
.toom_2_mul_256x256(lhs_signed
=False, rhs_signed
=False)
1144 assigned_registers
= allocate_registers(fn
)
1145 gen_asm_state
= GenAsmState(assigned_registers
)
1146 fn
.gen_asm(gen_asm_state
)
1147 self
.assertEqual(gen_asm_state
.output
, [
1149 'setvl 0, 0, 4, 0, 1, 1',
1151 'setvl 0, 0, 4, 0, 1, 1',
1153 'setvl 0, 0, 4, 0, 1, 1',
1155 'setvl 0, 0, 4, 0, 1, 1',
1157 'setvl 0, 0, 4, 0, 1, 1',
1159 'setvl 0, 0, 4, 0, 1, 1',
1160 'sv.or *14, *3, *3',
1161 'setvl 0, 0, 4, 0, 1, 1',
1162 'setvl 0, 0, 4, 0, 1, 1',
1164 'setvl 0, 0, 4, 0, 1, 1',
1170 'setvl 0, 0, 2, 0, 1, 1',
1172 'setvl 0, 0, 2, 0, 1, 1',
1173 'setvl 0, 0, 2, 0, 1, 1',
1175 'setvl 0, 0, 2, 0, 1, 1',
1178 'setvl 0, 0, 2, 0, 1, 1',
1179 'setvl 0, 0, 2, 0, 1, 1',
1181 'setvl 0, 0, 2, 0, 1, 1',
1182 'setvl 0, 0, 2, 0, 1, 1',
1184 'setvl 0, 0, 2, 0, 1, 1',
1190 'setvl 0, 0, 3, 0, 1, 1',
1194 'setvl 0, 0, 3, 0, 1, 1',
1195 'setvl 0, 0, 3, 0, 1, 1',
1196 'sv.or *24, *3, *3',
1197 'setvl 0, 0, 2, 0, 1, 1',
1198 'setvl 0, 0, 2, 0, 1, 1',
1200 'setvl 0, 0, 2, 0, 1, 1',
1206 'setvl 0, 0, 3, 0, 1, 1',
1210 'setvl 0, 0, 3, 0, 1, 1',
1211 'setvl 0, 0, 3, 0, 1, 1',
1212 'sv.or *30, *3, *3',
1213 'setvl 0, 0, 3, 0, 1, 1',
1215 'setvl 0, 0, 3, 0, 1, 1',
1216 'sv.or *9, *24, *24',
1217 'setvl 0, 0, 3, 0, 1, 1',
1218 'sv.or *6, *30, *30',
1219 'setvl 0, 0, 3, 0, 1, 1',
1220 'sv.adde *3, *9, *6',
1221 'setvl 0, 0, 3, 0, 1, 1',
1222 'sv.or *39, *3, *3',
1223 'setvl 0, 0, 4, 0, 1, 1',
1224 'setvl 0, 0, 4, 0, 1, 1',
1225 'sv.or *3, *14, *14',
1226 'setvl 0, 0, 4, 0, 1, 1',
1232 'setvl 0, 0, 2, 0, 1, 1',
1234 'setvl 0, 0, 2, 0, 1, 1',
1235 'setvl 0, 0, 2, 0, 1, 1',
1237 'setvl 0, 0, 2, 0, 1, 1',
1240 'setvl 0, 0, 2, 0, 1, 1',
1241 'setvl 0, 0, 2, 0, 1, 1',
1243 'setvl 0, 0, 2, 0, 1, 1',
1244 'setvl 0, 0, 2, 0, 1, 1',
1246 'setvl 0, 0, 2, 0, 1, 1',
1252 'setvl 0, 0, 3, 0, 1, 1',
1256 'setvl 0, 0, 3, 0, 1, 1',
1257 'setvl 0, 0, 3, 0, 1, 1',
1258 'sv.or *14, *3, *3',
1259 'setvl 0, 0, 2, 0, 1, 1',
1260 'setvl 0, 0, 2, 0, 1, 1',
1262 'setvl 0, 0, 2, 0, 1, 1',
1268 'setvl 0, 0, 3, 0, 1, 1',
1272 'setvl 0, 0, 3, 0, 1, 1',
1273 'setvl 0, 0, 3, 0, 1, 1',
1274 'sv.or *33, *3, *3',
1275 'setvl 0, 0, 3, 0, 1, 1',
1277 'setvl 0, 0, 3, 0, 1, 1',
1278 'sv.or *9, *14, *14',
1279 'setvl 0, 0, 3, 0, 1, 1',
1280 'sv.or *6, *33, *33',
1281 'setvl 0, 0, 3, 0, 1, 1',
1282 'sv.adde *3, *9, *6',
1283 'setvl 0, 0, 3, 0, 1, 1',
1284 'sv.or *36, *3, *3',
1285 'setvl 0, 0, 3, 0, 1, 1',
1286 'setvl 0, 0, 3, 0, 1, 1',
1287 'sv.or *3, *14, *14',
1288 'setvl 0, 0, 3, 0, 1, 1',
1289 'sv.or/mrr *5, *3, *3',
1295 'setvl 0, 0, 3, 0, 1, 1',
1297 'setvl 0, 0, 3, 0, 1, 1',
1298 'sv.or *8, *24, *24',
1301 'setvl 0, 0, 3, 0, 1, 1',
1302 'sv.maddedu *3, *8, 7, 6',
1303 'setvl 0, 0, 3, 0, 1, 1',
1305 'setvl 0, 0, 3, 0, 1, 1',
1306 'setvl 0, 0, 3, 0, 1, 1',
1310 'setvl 0, 0, 3, 0, 1, 1',
1311 'sv.or *8, *24, *24',
1314 'setvl 0, 0, 3, 0, 1, 1',
1315 'sv.maddedu *3, *8, 7, 6',
1316 'setvl 0, 0, 3, 0, 1, 1',
1318 'setvl 0, 0, 3, 0, 1, 1',
1319 'setvl 0, 0, 3, 0, 1, 1',
1327 'setvl 0, 0, 5, 0, 1, 1',
1333 'setvl 0, 0, 5, 0, 1, 1',
1334 'setvl 0, 0, 5, 0, 1, 1',
1341 'setvl 0, 0, 5, 0, 1, 1',
1342 'setvl 0, 0, 5, 0, 1, 1',
1344 'setvl 0, 0, 5, 0, 1, 1',
1345 'sv.or *14, *8, *8',
1346 'setvl 0, 0, 5, 0, 1, 1',
1348 'setvl 0, 0, 5, 0, 1, 1',
1349 'sv.adde *3, *14, *8',
1350 'setvl 0, 0, 5, 0, 1, 1',
1351 'setvl 0, 0, 5, 0, 1, 1',
1352 'setvl 0, 0, 5, 0, 1, 1',
1358 'setvl 0, 0, 3, 0, 1, 1',
1359 'sv.or *8, *24, *24',
1362 'setvl 0, 0, 3, 0, 1, 1',
1363 'sv.maddedu *3, *8, 7, 6',
1364 'setvl 0, 0, 3, 0, 1, 1',
1366 'setvl 0, 0, 3, 0, 1, 1',
1367 'setvl 0, 0, 3, 0, 1, 1',
1371 'setvl 0, 0, 4, 0, 1, 1',
1376 'setvl 0, 0, 4, 0, 1, 1',
1377 'setvl 0, 0, 4, 0, 1, 1',
1383 'setvl 0, 0, 4, 0, 1, 1',
1384 'setvl 0, 0, 4, 0, 1, 1',
1386 'setvl 0, 0, 4, 0, 1, 1',
1387 'sv.or *14, *7, *7',
1388 'setvl 0, 0, 4, 0, 1, 1',
1390 'setvl 0, 0, 4, 0, 1, 1',
1391 'sv.adde *3, *14, *7',
1392 'setvl 0, 0, 4, 0, 1, 1',
1393 'setvl 0, 0, 4, 0, 1, 1',
1394 'setvl 0, 0, 4, 0, 1, 1',
1399 'setvl 0, 0, 6, 0, 1, 1',
1406 'setvl 0, 0, 6, 0, 1, 1',
1407 'setvl 0, 0, 6, 0, 1, 1',
1408 'sv.or *24, *3, *3',
1409 'setvl 0, 0, 3, 0, 1, 1',
1410 'setvl 0, 0, 3, 0, 1, 1',
1411 'sv.or *3, *36, *36',
1412 'setvl 0, 0, 3, 0, 1, 1',
1413 'sv.or/mrr *5, *3, *3',
1419 'setvl 0, 0, 3, 0, 1, 1',
1421 'setvl 0, 0, 3, 0, 1, 1',
1422 'sv.or *8, *39, *39',
1425 'setvl 0, 0, 3, 0, 1, 1',
1426 'sv.maddedu *3, *8, 7, 6',
1427 'setvl 0, 0, 3, 0, 1, 1',
1429 'setvl 0, 0, 3, 0, 1, 1',
1430 'setvl 0, 0, 3, 0, 1, 1',
1434 'setvl 0, 0, 3, 0, 1, 1',
1435 'sv.or *8, *39, *39',
1438 'setvl 0, 0, 3, 0, 1, 1',
1439 'sv.maddedu *3, *8, 7, 6',
1440 'setvl 0, 0, 3, 0, 1, 1',
1442 'setvl 0, 0, 3, 0, 1, 1',
1443 'setvl 0, 0, 3, 0, 1, 1',
1451 'setvl 0, 0, 5, 0, 1, 1',
1457 'setvl 0, 0, 5, 0, 1, 1',
1458 'setvl 0, 0, 5, 0, 1, 1',
1465 'setvl 0, 0, 5, 0, 1, 1',
1466 'setvl 0, 0, 5, 0, 1, 1',
1468 'setvl 0, 0, 5, 0, 1, 1',
1469 'sv.or *14, *8, *8',
1470 'setvl 0, 0, 5, 0, 1, 1',
1472 'setvl 0, 0, 5, 0, 1, 1',
1473 'sv.adde *3, *14, *8',
1474 'setvl 0, 0, 5, 0, 1, 1',
1475 'setvl 0, 0, 5, 0, 1, 1',
1476 'setvl 0, 0, 5, 0, 1, 1',
1482 'setvl 0, 0, 3, 0, 1, 1',
1483 'sv.or *8, *39, *39',
1486 'setvl 0, 0, 3, 0, 1, 1',
1487 'sv.maddedu *3, *8, 7, 6',
1488 'setvl 0, 0, 3, 0, 1, 1',
1490 'setvl 0, 0, 3, 0, 1, 1',
1491 'setvl 0, 0, 3, 0, 1, 1',
1495 'setvl 0, 0, 4, 0, 1, 1',
1500 'setvl 0, 0, 4, 0, 1, 1',
1501 'setvl 0, 0, 4, 0, 1, 1',
1507 'setvl 0, 0, 4, 0, 1, 1',
1508 'setvl 0, 0, 4, 0, 1, 1',
1510 'setvl 0, 0, 4, 0, 1, 1',
1511 'sv.or *14, *7, *7',
1512 'setvl 0, 0, 4, 0, 1, 1',
1514 'setvl 0, 0, 4, 0, 1, 1',
1515 'sv.adde *3, *14, *7',
1516 'setvl 0, 0, 4, 0, 1, 1',
1517 'setvl 0, 0, 4, 0, 1, 1',
1518 'setvl 0, 0, 4, 0, 1, 1',
1523 'setvl 0, 0, 6, 0, 1, 1',
1530 'setvl 0, 0, 6, 0, 1, 1',
1531 'setvl 0, 0, 6, 0, 1, 1',
1532 'sv.or *36, *3, *3',
1533 'setvl 0, 0, 3, 0, 1, 1',
1534 'setvl 0, 0, 3, 0, 1, 1',
1535 'sv.or *3, *33, *33',
1536 'setvl 0, 0, 3, 0, 1, 1',
1537 'sv.or/mrr *5, *3, *3',
1543 'setvl 0, 0, 3, 0, 1, 1',
1545 'setvl 0, 0, 3, 0, 1, 1',
1546 'sv.or *8, *30, *30',
1549 'setvl 0, 0, 3, 0, 1, 1',
1550 'sv.maddedu *3, *8, 7, 6',
1551 'setvl 0, 0, 3, 0, 1, 1',
1553 'setvl 0, 0, 3, 0, 1, 1',
1554 'setvl 0, 0, 3, 0, 1, 1',
1558 'setvl 0, 0, 3, 0, 1, 1',
1559 'sv.or *8, *30, *30',
1562 'setvl 0, 0, 3, 0, 1, 1',
1563 'sv.maddedu *3, *8, 7, 6',
1564 'setvl 0, 0, 3, 0, 1, 1',
1566 'setvl 0, 0, 3, 0, 1, 1',
1567 'setvl 0, 0, 3, 0, 1, 1',
1575 'setvl 0, 0, 5, 0, 1, 1',
1581 'setvl 0, 0, 5, 0, 1, 1',
1582 'setvl 0, 0, 5, 0, 1, 1',
1589 'setvl 0, 0, 5, 0, 1, 1',
1590 'setvl 0, 0, 5, 0, 1, 1',
1592 'setvl 0, 0, 5, 0, 1, 1',
1593 'sv.or *14, *8, *8',
1594 'setvl 0, 0, 5, 0, 1, 1',
1596 'setvl 0, 0, 5, 0, 1, 1',
1597 'sv.adde *3, *14, *8',
1598 'setvl 0, 0, 5, 0, 1, 1',
1599 'setvl 0, 0, 5, 0, 1, 1',
1600 'setvl 0, 0, 5, 0, 1, 1',
1606 'setvl 0, 0, 3, 0, 1, 1',
1607 'sv.or *8, *30, *30',
1610 'setvl 0, 0, 3, 0, 1, 1',
1611 'sv.maddedu *3, *8, 7, 6',
1612 'setvl 0, 0, 3, 0, 1, 1',
1614 'setvl 0, 0, 3, 0, 1, 1',
1615 'setvl 0, 0, 3, 0, 1, 1',
1619 'setvl 0, 0, 4, 0, 1, 1',
1624 'setvl 0, 0, 4, 0, 1, 1',
1625 'setvl 0, 0, 4, 0, 1, 1',
1631 'setvl 0, 0, 4, 0, 1, 1',
1632 'setvl 0, 0, 4, 0, 1, 1',
1634 'setvl 0, 0, 4, 0, 1, 1',
1635 'sv.or *14, *7, *7',
1636 'setvl 0, 0, 4, 0, 1, 1',
1638 'setvl 0, 0, 4, 0, 1, 1',
1639 'sv.adde *3, *14, *7',
1640 'setvl 0, 0, 4, 0, 1, 1',
1641 'setvl 0, 0, 4, 0, 1, 1',
1642 'setvl 0, 0, 4, 0, 1, 1',
1647 'setvl 0, 0, 6, 0, 1, 1',
1654 'setvl 0, 0, 6, 0, 1, 1',
1655 'setvl 0, 0, 6, 0, 1, 1',
1656 'sv.or *30, *3, *3',
1657 'setvl 0, 0, 6, 0, 1, 1',
1658 'setvl 0, 0, 6, 0, 1, 1',
1659 'sv.or *3, *24, *24',
1660 'setvl 0, 0, 6, 0, 1, 1',
1661 'sv.or *14, *3, *3',
1668 'setvl 0, 0, 5, 0, 1, 1',
1674 'setvl 0, 0, 5, 0, 1, 1',
1675 'setvl 0, 0, 5, 0, 1, 1',
1676 'sv.or *25, *3, *3',
1677 'setvl 0, 0, 6, 0, 1, 1',
1678 'setvl 0, 0, 6, 0, 1, 1',
1679 'sv.or *3, *36, *36',
1680 'setvl 0, 0, 6, 0, 1, 1',
1681 'sv.or *14, *3, *3',
1688 'setvl 0, 0, 5, 0, 1, 1',
1694 'setvl 0, 0, 5, 0, 1, 1',
1695 'setvl 0, 0, 5, 0, 1, 1',
1696 'setvl 0, 0, 5, 0, 1, 1',
1698 'setvl 0, 0, 5, 0, 1, 1',
1699 'sv.or *14, *25, *25',
1700 'setvl 0, 0, 5, 0, 1, 1',
1702 'setvl 0, 0, 5, 0, 1, 1',
1703 'sv.subfe *3, *14, *8',
1704 'setvl 0, 0, 5, 0, 1, 1',
1705 'sv.or *20, *3, *3',
1706 'setvl 0, 0, 6, 0, 1, 1',
1707 'setvl 0, 0, 6, 0, 1, 1',
1708 'sv.or *3, *30, *30',
1709 'setvl 0, 0, 6, 0, 1, 1',
1710 'sv.or *14, *3, *3',
1717 'setvl 0, 0, 5, 0, 1, 1',
1723 'setvl 0, 0, 5, 0, 1, 1',
1724 'setvl 0, 0, 5, 0, 1, 1',
1725 'sv.or *30, *3, *3',
1726 'setvl 0, 0, 5, 0, 1, 1',
1728 'setvl 0, 0, 5, 0, 1, 1',
1729 'sv.or *14, *30, *30',
1730 'setvl 0, 0, 5, 0, 1, 1',
1731 'sv.or *8, *20, *20',
1732 'setvl 0, 0, 5, 0, 1, 1',
1733 'sv.subfe *3, *14, *8',
1734 'setvl 0, 0, 5, 0, 1, 1',
1735 'sv.or *16, *3, *3',
1736 'setvl 0, 0, 5, 0, 1, 1',
1737 'setvl 0, 0, 5, 0, 1, 1',
1738 'sv.or *3, *25, *25',
1739 'setvl 0, 0, 5, 0, 1, 1',
1745 'setvl 0, 0, 5, 0, 1, 1',
1746 'setvl 0, 0, 5, 0, 1, 1',
1747 'sv.or *3, *16, *16',
1748 'setvl 0, 0, 5, 0, 1, 1',
1754 'setvl 0, 0, 5, 0, 1, 1',
1755 'setvl 0, 0, 5, 0, 1, 1',
1756 'sv.or *3, *30, *30',
1757 'setvl 0, 0, 5, 0, 1, 1',
1769 'setvl 0, 0, 6, 0, 1, 1',
1776 'setvl 0, 0, 6, 0, 1, 1',
1777 'setvl 0, 0, 6, 0, 1, 1',
1778 'sv.or *14, *3, *3',
1785 'setvl 0, 0, 6, 0, 1, 1',
1786 'setvl 0, 0, 6, 0, 1, 1',
1788 'setvl 0, 0, 6, 0, 1, 1',
1789 'sv.or *20, *14, *14',
1790 'setvl 0, 0, 6, 0, 1, 1',
1791 'sv.or *14, *3, *3',
1792 'setvl 0, 0, 6, 0, 1, 1',
1793 'sv.adde *3, *20, *14',
1794 'setvl 0, 0, 6, 0, 1, 1',
1795 'setvl 0, 0, 6, 0, 1, 1',
1796 'setvl 0, 0, 6, 0, 1, 1',
1797 'sv.or *20, *3, *3',
1804 'setvl 0, 0, 4, 0, 1, 1',
1808 'setvl 0, 0, 4, 0, 1, 1',
1809 'setvl 0, 0, 4, 0, 1, 1',
1815 'setvl 0, 0, 4, 0, 1, 1',
1816 'setvl 0, 0, 4, 0, 1, 1',
1818 'setvl 0, 0, 4, 0, 1, 1',
1819 'sv.or *14, *7, *7',
1820 'setvl 0, 0, 4, 0, 1, 1',
1822 'setvl 0, 0, 4, 0, 1, 1',
1823 'sv.adde *3, *14, *7',
1824 'setvl 0, 0, 4, 0, 1, 1',
1825 'setvl 0, 0, 4, 0, 1, 1',
1826 'setvl 0, 0, 4, 0, 1, 1',
1831 'setvl 0, 0, 8, 0, 1, 1',
1840 'setvl 0, 0, 8, 0, 1, 1',
1841 'setvl 0, 0, 8, 0, 1, 1',
1842 'setvl 0, 0, 8, 0, 1, 1',
1843 'setvl 0, 0, 8, 0, 1, 1',
1844 'sv.or/mrr *4, *3, *3',
1846 'setvl 0, 0, 8, 0, 1, 1',
1851 if __name__
== "__main__":