2 # SPDX-License-Identifier: LGPL-2.1-or-later
3 # See Notices.txt for copyright information
5 from src
.multiply
import PartitionPoints
, PartitionedAdder
, AddReduce
, \
6 Mul8_16_32_64
, OP_MUL_LOW
, OP_MUL_SIGNED_HIGH
, \
7 OP_MUL_SIGNED_UNSIGNED_HIGH
, OP_MUL_UNSIGNED_HIGH
8 from nmigen
import Signal
, Module
9 from nmigen
.back
.pysim
import Simulator
, Delay
, Tick
, Passive
10 from nmigen
.hdl
.ast
import Assign
, Value
11 from typing
import Any
, Generator
, List
, Union
, Optional
, Tuple
, Iterable
13 from hashlib
import sha256
18 def create_simulator(module
: Any
,
20 test_name
: str) -> Simulator
:
21 return Simulator(module
,
22 vcd_file
=open(test_name
+ ".vcd", "w"),
23 gtkw_file
=open(test_name
+ ".gtkw", "w"),
27 AsyncProcessCommand
= Union
[Delay
, Tick
, Passive
, Assign
, Value
]
28 ProcessCommand
= Optional
[AsyncProcessCommand
]
29 AsyncProcessGenerator
= Generator
[AsyncProcessCommand
, Union
[int, None], None]
30 ProcessGenerator
= Generator
[ProcessCommand
, Union
[int, None], None]
33 class TestPartitionPoints(unittest
.TestCase
):
34 def test(self
) -> None:
38 partition_point_10
= Signal()
39 partition_points
= PartitionPoints({1: True,
41 10: partition_point_10
})
42 module
.d
.comb
+= mask
.eq(partition_points
.as_mask(width
))
43 with
create_simulator(module
,
44 [mask
, partition_point_10
],
45 "partition_points") as sim
:
46 def async_process() -> AsyncProcessGenerator
:
47 self
.assertEqual((yield partition_points
[1]), True)
48 self
.assertEqual((yield partition_points
[5]), False)
49 yield partition_point_10
.eq(0)
51 self
.assertEqual((yield mask
), 0xFFFD)
52 yield partition_point_10
.eq(1)
54 self
.assertEqual((yield mask
), 0xFBFD)
56 sim
.add_process(async_process
)
60 class TestPartitionedAdder(unittest
.TestCase
):
61 def test(self
) -> None:
63 partition_nibbles
= Signal()
64 partition_bytes
= Signal()
65 module
= PartitionedAdder(width
,
66 {0x4: partition_nibbles
,
67 0x8: partition_bytes | partition_nibbles
,
68 0xC: partition_nibbles
})
69 with
create_simulator(module
,
75 "partitioned_adder") as sim
:
76 def async_process() -> AsyncProcessGenerator
:
77 def test_add(msg_prefix
: str,
78 *mask_list
: Tuple
[int, ...]) -> Any
:
79 for a
, b
in [(0x0000, 0x0000),
90 for mask
in mask_list
:
91 y |
= mask
& ((a
& mask
) + (b
& mask
))
92 output
= (yield module
.output
)
93 msg
= f
"{msg_prefix}: 0x{a:X} + 0x{b:X}" + \
94 f
" => 0x{y:X} != 0x{output:X}"
95 self
.assertEqual(y
, output
, msg
)
96 yield partition_nibbles
.eq(0)
97 yield partition_bytes
.eq(0)
98 yield from test_add("16-bit", 0xFFFF)
99 yield partition_nibbles
.eq(0)
100 yield partition_bytes
.eq(1)
101 yield from test_add("8-bit", 0xFF00, 0x00FF)
102 yield partition_nibbles
.eq(1)
103 yield partition_bytes
.eq(0)
104 yield from test_add("4-bit", 0xF000, 0x0F00, 0x00F0, 0x000F)
106 sim
.add_process(async_process
)
110 class GenOrCheck(enum
.Enum
):
111 Generate
= enum
.auto()
115 class TestAddReduce(unittest
.TestCase
):
116 def calculate_input_values(self
,
119 extra_keys
: List
[int] = []
120 ) -> (List
[int], List
[str]):
122 input_values_str
= []
123 for i
in range(input_count
):
131 hash_input
= f
"{input_count} {i} {key} {extra_keys}"
132 hash = sha256(hash_input
.encode()).digest()
133 value
= int.from_bytes(hash, byteorder
="little")
135 input_values
.append(value
)
136 input_values_str
.append(f
"0x{value:04X}")
137 return input_values
, input_values_str
139 def subtest_value(self
,
140 inputs
: List
[Signal
],
142 mask_list
: List
[int],
143 gen_or_check
: GenOrCheck
,
144 values
: List
[int]) -> AsyncProcessGenerator
:
145 if gen_or_check
== GenOrCheck
.Generate
:
146 for i
, v
in zip(inputs
, values
):
150 for mask
in mask_list
:
155 output
= (yield module
.output
)
156 if gen_or_check
== GenOrCheck
.Check
:
157 self
.assertEqual(y
, output
, f
"0x{y:X} != 0x{output:X}")
160 def subtest_key(self
,
162 inputs
: List
[Signal
],
165 mask_list
: List
[int],
166 gen_or_check
: GenOrCheck
) -> AsyncProcessGenerator
:
167 values
, values_str
= self
.calculate_input_values(input_count
, key
)
168 if gen_or_check
== GenOrCheck
.Check
:
169 with self
.subTest(inputs
=values_str
):
170 yield from self
.subtest_value(inputs
,
176 yield from self
.subtest_value(inputs
,
182 def subtest_run_sim(self
,
187 inputs
: List
[Signal
],
189 delay_cycles
: int) -> None:
190 def generic_process(gen_or_check
: GenOrCheck
) -> AsyncProcessGenerator
:
191 for partition_4_value
, partition_8_value
, mask_list
in [
193 (0, 1, [0xFF00, 0x00FF]),
194 (1, 0, [0xFFF0, 0x000F]),
195 (1, 1, [0xFF00, 0x00F0, 0x000F])]:
197 if gen_or_check
== GenOrCheck
.Check
:
198 with self
.subTest(partition_4
=partition_4_value
,
199 partition_8
=partition_8_value
):
200 for key
in range(key_count
):
201 with self
.subTest(key
=key
):
202 yield from self
.subtest_key(input_count
,
209 if gen_or_check
== GenOrCheck
.Generate
:
210 yield partition_4
.eq(partition_4_value
)
211 yield partition_8
.eq(partition_8_value
)
212 for key
in range(key_count
):
213 yield from self
.subtest_key(input_count
,
220 def generate_process() -> AsyncProcessGenerator
:
221 yield from generic_process(GenOrCheck
.Generate
)
223 def check_process() -> AsyncProcessGenerator
:
224 if delay_cycles
!= 0:
225 for _
in range(delay_cycles
):
227 yield from generic_process(GenOrCheck
.Check
)
230 sim
.add_process(generate_process
)
231 sim
.add_process(check_process
)
234 def subtest_file(self
,
236 register_levels
: List
[int]) -> None:
237 max_level
= AddReduce
.get_max_level(input_count
)
238 for level
in register_levels
:
239 if level
> max_level
:
241 partition_4
= Signal()
242 partition_8
= Signal()
243 partition_points
= PartitionPoints()
244 partition_points
[4] = partition_4
245 partition_points
[8] = partition_8
247 inputs
= [Signal(width
, name
=f
"input_{i}")
248 for i
in range(input_count
)]
249 module
= AddReduce(inputs
,
253 file_name
= "add_reduce"
254 if len(register_levels
) != 0:
255 file_name
+= f
"-{'_'.join(map(repr, register_levels))}"
256 file_name
+= f
"-{input_count:02d}"
257 with
create_simulator(module
,
263 self
.subtest_run_sim(input_count
,
269 len(register_levels
))
271 def subtest_register_levels(self
, register_levels
: List
[int]) -> None:
272 for input_count
in range(0, 16):
273 with self
.subTest(input_count
=input_count
,
274 register_levels
=repr(register_levels
)):
275 self
.subtest_file(input_count
, register_levels
)
277 def test_empty(self
) -> None:
278 self
.subtest_register_levels([])
280 def test_0(self
) -> None:
281 self
.subtest_register_levels([0])
283 def test_1(self
) -> None:
284 self
.subtest_register_levels([1])
286 def test_2(self
) -> None:
287 self
.subtest_register_levels([2])
289 def test_3(self
) -> None:
290 self
.subtest_register_levels([3])
292 def test_4(self
) -> None:
293 self
.subtest_register_levels([4])
295 def test_5(self
) -> None:
296 self
.subtest_register_levels([5])
298 def test_0(self
) -> None:
299 self
.subtest_register_levels([0])
301 def test_0_1(self
) -> None:
302 self
.subtest_register_levels([0, 1])
304 def test_0_1_2(self
) -> None:
305 self
.subtest_register_levels([0, 1, 2])
307 def test_0_1_2_3(self
) -> None:
308 self
.subtest_register_levels([0, 1, 2, 3])
310 def test_0_1_2_3_4(self
) -> None:
311 self
.subtest_register_levels([0, 1, 2, 3, 4])
313 def test_0_1_2_3_4_5(self
) -> None:
314 self
.subtest_register_levels([0, 1, 2, 3, 4, 5])
316 def test_0_2(self
) -> None:
317 self
.subtest_register_levels([0, 2])
319 def test_0_3(self
) -> None:
320 self
.subtest_register_levels([0, 3])
322 def test_0_4(self
) -> None:
323 self
.subtest_register_levels([0, 4])
325 def test_0_5(self
) -> None:
326 self
.subtest_register_levels([0, 5])
335 self
.a_signed
= a_signed
336 self
.b_signed
= b_signed
337 self
.bit_width
= bit_width
338 self
.high_half
= high_half
341 return f
"SIMDMulLane({self.a_signed}, {self.b_signed}, " +\
342 f
"{self.bit_width}, {self.high_half})"
345 class TestMul8_16_32_64(unittest
.TestCase
):
347 def simd_mul(a
: int, b
: int, lanes
: List
[SIMDMulLane
]) -> Tuple
[int, int]:
349 intermediate_output
= 0
352 a_signed
= lane
.a_signed
or not lane
.high_half
353 b_signed
= lane
.b_signed
or not lane
.high_half
354 mask
= (1 << lane
.bit_width
) - 1
355 sign_bit
= 1 << (lane
.bit_width
- 1)
356 a_part
= (a
>> shift
) & mask
357 if a_signed
and (a_part
& sign_bit
) != 0:
358 a_part
-= 1 << lane
.bit_width
359 b_part
= (b
>> shift
) & mask
360 if b_signed
and (b_part
& sign_bit
) != 0:
361 b_part
-= 1 << lane
.bit_width
362 value
= a_part
* b_part
363 value
&= (1 << (lane
.bit_width
* 2)) - 1
364 intermediate_output |
= value
<< (shift
* 2)
366 value
>>= lane
.bit_width
368 output |
= value
<< shift
369 shift
+= lane
.bit_width
370 return output
, intermediate_output
373 def get_test_cases(lanes
: List
[SIMDMulLane
],
374 keys
: Iterable
[int]) -> Iterable
[Tuple
[int, int]]:
377 hash_input
= f
"{i} {lanes} {list(keys)}"
378 hash = sha256(hash_input
.encode()).digest()
379 value
= int.from_bytes(hash, byteorder
="little")
380 yield (value
& mask
, value
>> 64)
385 a |
= 1 << (shift
+ lane
.bit_width
- 1)
386 b |
= 1 << (shift
+ lane
.bit_width
- 1)
387 shift
+= lane
.bit_width
390 def test_simd_mul_lane(self
):
391 self
.assertEqual(f
"{SIMDMulLane(True, True, 8, False)}",
392 "SIMDMulLane(True, True, 8, False)")
394 def test_simd_mul(self
):
395 lanes
= [SIMDMulLane(True,
411 a
= 0x0123456789ABCDEF
412 b
= 0xFEDCBA9876543210
413 output
= 0x0121FA00FE1C28FE
414 intermediate_output
= 0x0121FA0023E20B28C94DFE1C280AFEF0
415 self
.assertEqual(self
.simd_mul(a
, b
, lanes
),
416 (output
, intermediate_output
))
417 a
= 0x8123456789ABCDEF
418 b
= 0xFEDCBA9876543210
419 output
= 0x81B39CB4FE1C28FE
420 intermediate_output
= 0x81B39CB423E20B28C94DFE1C280AFEF0
421 self
.assertEqual(self
.simd_mul(a
, b
, lanes
),
422 (output
, intermediate_output
))
424 def test_signed_mul_from_unsigned(self
):
425 for i
in range(0, 0x10):
426 for j
in range(0, 0x10):
427 si
= i
if i
& 8 else i
- 0x10 # signed i
428 sj
= j
if j
& 8 else j
- 0x10 # signed j
432 with self
.subTest(i
=i
, j
=j
, si
=si
, sj
=sj
,
433 mulu
=mulu
, mulsu
=mulsu
, mul
=mul
):
438 self
.assertEqual(mulsu
& 0xFF, mulsu2
& 0xFF)
443 self
.assertEqual(mul
& 0xFF, mul2
& 0xFF)
445 def subtest_value(self
,
448 module
: Mul8_16_32_64
,
449 lanes
: List
[SIMDMulLane
],
450 gen_or_check
: GenOrCheck
) -> AsyncProcessGenerator
:
451 if gen_or_check
== GenOrCheck
.Generate
:
454 output2
, intermediate_output2
= self
.simd_mul(a
, b
, lanes
)
456 if gen_or_check
== GenOrCheck
.Check
:
457 intermediate_output
= (yield module
._intermediate
_output
)
458 self
.assertEqual(intermediate_output
,
459 intermediate_output2
,
460 f
"0x{intermediate_output:X} "
461 + f
"!= 0x{intermediate_output2:X}")
462 output
= (yield module
.output
)
463 self
.assertEqual(output
, output2
, f
"0x{output:X} != 0x{output2:X}")
466 def subtest_lanes_2(self
,
467 lanes
: List
[SIMDMulLane
],
468 module
: Mul8_16_32_64
,
469 gen_or_check
: GenOrCheck
) -> AsyncProcessGenerator
:
476 op
= OP_MUL_SIGNED_HIGH
478 op
= OP_MUL_SIGNED_UNSIGNED_HIGH
480 self
.assertFalse(lane
.b_signed
,
481 "unsigned * signed not supported")
482 op
= OP_MUL_UNSIGNED_HIGH
485 self
.assertEqual(lane
.bit_width
% 8, 0)
486 for i
in range(lane
.bit_width
// 8):
487 if gen_or_check
== GenOrCheck
.Generate
:
488 yield module
.part_ops
[part_index
].eq(op
)
490 for i
in range(lane
.bit_width
// 8 - 1):
491 if gen_or_check
== GenOrCheck
.Generate
:
492 yield module
.part_pts
[bit_index
].eq(0)
494 if bit_index
< 64 and gen_or_check
== GenOrCheck
.Generate
:
495 yield module
.part_pts
[bit_index
].eq(1)
497 self
.assertEqual(part_index
, 8)
498 for a
, b
in self
.get_test_cases(lanes
, ()):
499 if gen_or_check
== GenOrCheck
.Check
:
500 with self
.subTest(a
=f
"{a:X}", b
=f
"{b:X}"):
501 yield from self
.subtest_value(a
, b
, module
, lanes
, gen_or_check
)
503 yield from self
.subtest_value(a
, b
, module
, lanes
, gen_or_check
)
505 def subtest_lanes(self
,
506 lanes
: List
[SIMDMulLane
],
507 module
: Mul8_16_32_64
,
508 gen_or_check
: GenOrCheck
) -> AsyncProcessGenerator
:
509 if gen_or_check
== GenOrCheck
.Check
:
510 with self
.subTest(lanes
=repr(lanes
)):
511 yield from self
.subtest_lanes_2(lanes
, module
, gen_or_check
)
513 yield from self
.subtest_lanes_2(lanes
, module
, gen_or_check
)
515 def subtest_file(self
,
516 register_levels
: List
[int]) -> None:
517 module
= Mul8_16_32_64(register_levels
)
518 file_name
= "mul8_16_32_64"
519 if len(register_levels
) != 0:
520 file_name
+= f
"-{'_'.join(map(repr, register_levels))}"
523 module
._intermediate
_output
,
525 ports
.extend(module
.part_ops
)
526 ports
.extend(module
.part_pts
.values())
527 for signals
in module
._delayed
_part
_ops
:
528 ports
.extend(signals
)
529 ports
.extend(module
._part
_8)
530 ports
.extend(module
._part
_16)
531 ports
.extend(module
._part
_32)
532 ports
.extend(module
._part
_64)
533 for signals
in module
._delayed
_part
_8:
534 ports
.extend(signals
)
535 for signals
in module
._delayed
_part
_16:
536 ports
.extend(signals
)
537 for signals
in module
._delayed
_part
_32:
538 ports
.extend(signals
)
539 for signals
in module
._delayed
_part
_64:
540 ports
.extend(signals
)
541 ports
+= [module
._output
_64,
545 ports
.extend(module
._a
_signed
)
546 ports
.extend(module
._b
_signed
)
547 ports
+= [module
._not
_a
_term
_8,
548 module
._neg
_lsb
_a
_term
_8,
549 module
._not
_b
_term
_8,
550 module
._neg
_lsb
_b
_term
_8,
551 module
._not
_a
_term
_16,
552 module
._neg
_lsb
_a
_term
_16,
553 module
._not
_b
_term
_16,
554 module
._neg
_lsb
_b
_term
_16,
555 module
._not
_a
_term
_32,
556 module
._neg
_lsb
_a
_term
_32,
557 module
._not
_b
_term
_32,
558 module
._neg
_lsb
_b
_term
_32,
559 module
._not
_a
_term
_64,
560 module
._neg
_lsb
_a
_term
_64,
561 module
._not
_b
_term
_64,
562 module
._neg
_lsb
_b
_term
_64]
563 with
create_simulator(module
, ports
, file_name
) as sim
:
564 def process(gen_or_check
: GenOrCheck
) -> AsyncProcessGenerator
:
565 for a_signed
in False, True:
566 for b_signed
in False, True:
567 if not a_signed
and b_signed
:
569 for high_half
in False, True:
570 if not high_half
and not (a_signed
and b_signed
):
572 yield from self
.subtest_lanes(
573 [SIMDMulLane(a_signed
,
579 yield from self
.subtest_lanes(
580 [SIMDMulLane(a_signed
,
586 yield from self
.subtest_lanes(
587 [SIMDMulLane(a_signed
,
593 yield from self
.subtest_lanes(
594 [SIMDMulLane(a_signed
,
600 yield from self
.subtest_lanes([SIMDMulLane(False,
618 yield from self
.subtest_lanes([SIMDMulLane(True,
636 yield from self
.subtest_lanes([SIMDMulLane(True,
655 def generate_process() -> AsyncProcessGenerator
:
656 yield from process(GenOrCheck
.Generate
)
658 def check_process() -> AsyncProcessGenerator
:
659 if len(register_levels
) != 0:
660 for _
in register_levels
:
662 yield from process(GenOrCheck
.Check
)
665 sim
.add_process(generate_process
)
666 sim
.add_process(check_process
)
669 def subtest_register_levels(self
, register_levels
: List
[int]) -> None:
670 with self
.subTest(register_levels
=repr(register_levels
)):
671 self
.subtest_file(register_levels
)
673 def test_empty(self
) -> None:
674 self
.subtest_register_levels([])
676 def test_0(self
) -> None:
677 self
.subtest_register_levels([0])
679 def test_1(self
) -> None:
680 self
.subtest_register_levels([1])
682 def test_2(self
) -> None:
683 self
.subtest_register_levels([2])
685 def test_3(self
) -> None:
686 self
.subtest_register_levels([3])
688 def test_4(self
) -> None:
689 self
.subtest_register_levels([4])
691 def test_5(self
) -> None:
692 self
.subtest_register_levels([5])
694 def test_6(self
) -> None:
695 self
.subtest_register_levels([6])
697 def test_7(self
) -> None:
698 self
.subtest_register_levels([7])
700 def test_8(self
) -> None:
701 self
.subtest_register_levels([8])
703 def test_9(self
) -> None:
704 self
.subtest_register_levels([9])
706 def test_10(self
) -> None:
707 self
.subtest_register_levels([10])
709 def test_0(self
) -> None:
710 self
.subtest_register_levels([0])
712 def test_0_1(self
) -> None:
713 self
.subtest_register_levels([0, 1])
715 def test_0_1_2(self
) -> None:
716 self
.subtest_register_levels([0, 1, 2])
718 def test_0_1_2_3(self
) -> None:
719 self
.subtest_register_levels([0, 1, 2, 3])
721 def test_0_1_2_3_4(self
) -> None:
722 self
.subtest_register_levels([0, 1, 2, 3, 4])
724 def test_0_1_2_3_4_5(self
) -> None:
725 self
.subtest_register_levels([0, 1, 2, 3, 4, 5])
727 def test_0_1_2_3_4_5_6(self
) -> None:
728 self
.subtest_register_levels([0, 1, 2, 3, 4, 5, 6])
730 def test_0_1_2_3_4_5_6_7(self
) -> None:
731 self
.subtest_register_levels([0, 1, 2, 3, 4, 5, 6, 7])
733 def test_0_1_2_3_4_5_6_7_8(self
) -> None:
734 self
.subtest_register_levels([0, 1, 2, 3, 4, 5, 6, 7, 8])
736 def test_0_1_2_3_4_5_6_7_8_9(self
) -> None:
737 self
.subtest_register_levels([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
739 def test_0_1_2_3_4_5_6_7_8_9_10(self
) -> None:
740 self
.subtest_register_levels([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
742 def test_0_2(self
) -> None:
743 self
.subtest_register_levels([0, 2])
745 def test_0_3(self
) -> None:
746 self
.subtest_register_levels([0, 3])
748 def test_0_4(self
) -> None:
749 self
.subtest_register_levels([0, 4])
751 def test_0_5(self
) -> None:
752 self
.subtest_register_levels([0, 5])
754 def test_0_6(self
) -> None:
755 self
.subtest_register_levels([0, 6])
757 def test_0_7(self
) -> None:
758 self
.subtest_register_levels([0, 7])
760 def test_0_8(self
) -> None:
761 self
.subtest_register_levels([0, 8])
763 def test_0_9(self
) -> None:
764 self
.subtest_register_levels([0, 9])
766 def test_0_10(self
) -> None:
767 self
.subtest_register_levels([0, 10])
769 if __name__
== '__main__':