1 # SPDX-License-Identifier: LGPL-2.1-or-later
2 # See Notices.txt for copyright information
3 """Integer Multiplication."""
5 from nmigen
import Signal
, Module
, Value
, Elaboratable
, Cat
, C
, Mux
, Repl
6 from nmigen
.hdl
.ast
import Assign
7 from abc
import ABCMeta
, abstractmethod
8 from typing
import Any
, NewType
, Union
, List
, Dict
, Iterable
, Mapping
, Optional
9 from typing_extensions
import final
10 from nmigen
.cli
import main
12 PartitionPointsIn
= Mapping
[int, Union
[Value
, bool, int]]
15 class PartitionPoints(Dict
[int, Value
]):
16 """Partition points and corresponding ``Value``s.
18 The points at where an ALU is partitioned along with ``Value``s that
19 specify if the corresponding partition points are enabled.
21 For example: ``{1: True, 5: True, 10: True}`` with
22 ``width == 16`` specifies that the ALU is split into 4 sections:
25 * bits 5 <= ``i`` < 10
26 * bits 10 <= ``i`` < 16
28 If the partition_points were instead ``{1: True, 5: a, 10: True}``
29 where ``a`` is a 1-bit ``Signal``:
30 * If ``a`` is asserted:
33 * bits 5 <= ``i`` < 10
34 * bits 10 <= ``i`` < 16
37 * bits 1 <= ``i`` < 10
38 * bits 10 <= ``i`` < 16
41 def __init__(self
, partition_points
: Optional
[PartitionPointsIn
] = None):
42 """Create a new ``PartitionPoints``.
44 :param partition_points: the input partition points to values mapping.
47 if partition_points
is not None:
48 for point
, enabled
in partition_points
.items():
49 if not isinstance(point
, int):
50 raise TypeError("point must be a non-negative integer")
52 raise ValueError("point must be a non-negative integer")
53 self
[point
] = Value
.wrap(enabled
)
56 name
: Optional
[str] = None,
57 src_loc_at
: int = 0) -> 'PartitionPoints':
58 """Create a new ``PartitionPoints`` with ``Signal``s for all values.
60 :param name: the base name for the new ``Signal``s.
63 name
= Signal(src_loc_at
=1+src_loc_at
).name
# get variable name
64 retval
= PartitionPoints()
65 for point
, enabled
in self
.items():
66 retval
[point
] = Signal(enabled
.shape(), name
=f
"{name}_{point}")
69 def eq(self
, rhs
: 'PartitionPoints') -> Iterable
[Assign
]:
70 """Assign ``PartitionPoints`` using ``Signal.eq``."""
71 if set(self
.keys()) != set(rhs
.keys()):
72 raise ValueError("incompatible point set")
73 for point
, enabled
in self
.items():
74 yield enabled
.eq(rhs
[point
])
76 def as_mask(self
, width
: int) -> Value
:
77 """Create a bit-mask from `self`.
79 Each bit in the returned mask is clear only if the partition point at
80 the same bit-index is enabled.
82 :param width: the bit width of the resulting mask
84 bits
: List
[Union
[Value
, bool]]
86 for i
in range(width
):
93 def get_max_partition_count(self
, width
: int) -> int:
94 """Get the maximum number of partitions.
96 Gets the number of partitions when all partition points are enabled.
99 for point
in self
.keys():
104 def fits_in_width(self
, width
: int) -> bool:
105 """Check if all partition points are smaller than `width`."""
106 for point
in self
.keys():
113 class FullAdder(Elaboratable
):
116 :attribute in0: the first input
117 :attribute in1: the second input
118 :attribute in2: the third input
119 :attribute sum: the sum output
120 :attribute carry: the carry output
123 def __init__(self
, width
: int):
124 """Create a ``FullAdder``.
126 :param width: the bit width of the input and output
128 self
.in0
= Signal(width
)
129 self
.in1
= Signal(width
)
130 self
.in2
= Signal(width
)
131 self
.sum = Signal(width
)
132 self
.carry
= Signal(width
)
134 def elaborate(self
, platform
: Any
) -> Module
:
135 """Elaborate this module."""
137 m
.d
.comb
+= self
.sum.eq(self
.in0 ^ self
.in1 ^ self
.in2
)
138 m
.d
.comb
+= self
.carry
.eq((self
.in0
& self
.in1
)
139 |
(self
.in1
& self
.in2
)
140 |
(self
.in2
& self
.in0
))
145 class PartitionedAdder(Elaboratable
):
146 """Partitioned Adder.
148 :attribute width: the bit width of the input and output. Read-only.
149 :attribute a: the first input to the adder
150 :attribute b: the second input to the adder
151 :attribute output: the sum output
152 :attribute partition_points: the input partition points. Modification not
153 supported, except for by ``Signal.eq``.
156 def __init__(self
, width
: int, partition_points
: PartitionPointsIn
):
157 """Create a ``PartitionedAdder``.
159 :param width: the bit width of the input and output
160 :param partition_points: the input partition points
163 self
.a
= Signal(width
)
164 self
.b
= Signal(width
)
165 self
.output
= Signal(width
)
166 self
.partition_points
= PartitionPoints(partition_points
)
167 if not self
.partition_points
.fits_in_width(width
):
168 raise ValueError("partition_points doesn't fit in width")
170 for i
in range(self
.width
):
171 if i
in self
.partition_points
:
174 self
._expanded
_width
= expanded_width
175 self
._expanded
_a
= Signal(expanded_width
)
176 self
._expanded
_b
= Signal(expanded_width
)
177 self
._expanded
_output
= Signal(expanded_width
)
179 def elaborate(self
, platform
: Any
) -> Module
:
180 """Elaborate this module."""
183 for i
in range(self
.width
):
184 if i
in self
.partition_points
:
185 # add extra bit set to 0 + 0 for enabled partition points
186 # and 1 + 0 for disabled partition points
187 m
.d
.comb
+= self
._expanded
_a
[expanded_index
].eq(
188 ~self
.partition_points
[i
])
189 m
.d
.comb
+= self
._expanded
_b
[expanded_index
].eq(0)
191 m
.d
.comb
+= self
._expanded
_a
[expanded_index
].eq(self
.a
[i
])
192 m
.d
.comb
+= self
._expanded
_b
[expanded_index
].eq(self
.b
[i
])
193 m
.d
.comb
+= self
.output
[i
].eq(
194 self
._expanded
_output
[expanded_index
])
196 # use only one addition to take advantage of look-ahead carry and
197 # special hardware on FPGAs
198 m
.d
.comb
+= self
._expanded
_output
.eq(
199 self
._expanded
_a
+ self
._expanded
_b
)
203 FULL_ADDER_INPUT_COUNT
= 3
207 class AddReduce(Elaboratable
):
208 """Add list of numbers together.
210 :attribute inputs: input ``Signal``s to be summed. Modification not
211 supported, except for by ``Signal.eq``.
212 :attribute register_levels: List of nesting levels that should have
214 :attribute output: output sum.
215 :attribute partition_points: the input partition points. Modification not
216 supported, except for by ``Signal.eq``.
220 inputs
: Iterable
[Signal
],
222 register_levels
: Iterable
[int],
223 partition_points
: PartitionPointsIn
):
224 """Create an ``AddReduce``.
226 :param inputs: input ``Signal``s to be summed.
227 :param output_width: bit-width of ``output``.
228 :param register_levels: List of nesting levels that should have
230 :param partition_points: the input partition points.
232 self
.inputs
= list(inputs
)
233 self
._resized
_inputs
= [
234 Signal(output_width
, name
=f
"resized_inputs[{i}]")
235 for i
in range(len(self
.inputs
))]
236 self
.register_levels
= list(register_levels
)
237 self
.output
= Signal(output_width
)
238 self
.partition_points
= PartitionPoints(partition_points
)
239 if not self
.partition_points
.fits_in_width(output_width
):
240 raise ValueError("partition_points doesn't fit in output_width")
241 self
._reg
_partition
_points
= self
.partition_points
.like()
242 max_level
= AddReduce
.get_max_level(len(self
.inputs
))
243 for level
in self
.register_levels
:
244 if level
> max_level
:
246 "not enough adder levels for specified register levels")
249 def get_max_level(input_count
: int) -> int:
250 """Get the maximum level.
252 All ``register_levels`` must be less than or equal to the maximum
257 groups
= AddReduce
.full_adder_groups(input_count
)
260 input_count
%= FULL_ADDER_INPUT_COUNT
261 input_count
+= 2 * len(groups
)
264 def next_register_levels(self
) -> Iterable
[int]:
265 """``Iterable`` of ``register_levels`` for next recursive level."""
266 for level
in self
.register_levels
:
271 def full_adder_groups(input_count
: int) -> range:
272 """Get ``inputs`` indices for which a full adder should be built."""
274 input_count
- FULL_ADDER_INPUT_COUNT
+ 1,
275 FULL_ADDER_INPUT_COUNT
)
277 def elaborate(self
, platform
: Any
) -> Module
:
278 """Elaborate this module."""
281 # resize inputs to correct bit-width and optionally add in
283 resized_input_assignments
= [self
._resized
_inputs
[i
].eq(self
.inputs
[i
])
284 for i
in range(len(self
.inputs
))]
285 if 0 in self
.register_levels
:
286 m
.d
.sync
+= resized_input_assignments
287 m
.d
.sync
+= self
._reg
_partition
_points
.eq(self
.partition_points
)
289 m
.d
.comb
+= resized_input_assignments
290 m
.d
.comb
+= self
._reg
_partition
_points
.eq(self
.partition_points
)
292 groups
= AddReduce
.full_adder_groups(len(self
.inputs
))
293 # if there are no full adders to create, then we handle the base cases
294 # and return, otherwise we go on to the recursive case
296 if len(self
.inputs
) == 0:
297 # use 0 as the default output value
298 m
.d
.comb
+= self
.output
.eq(0)
299 elif len(self
.inputs
) == 1:
300 # handle single input
301 m
.d
.comb
+= self
.output
.eq(self
._resized
_inputs
[0])
303 # base case for adding 2 or more inputs, which get recursively
304 # reduced to 2 inputs
305 assert len(self
.inputs
) == 2
306 adder
= PartitionedAdder(len(self
.output
),
307 self
._reg
_partition
_points
)
308 m
.submodules
.final_adder
= adder
309 m
.d
.comb
+= adder
.a
.eq(self
._resized
_inputs
[0])
310 m
.d
.comb
+= adder
.b
.eq(self
._resized
_inputs
[1])
311 m
.d
.comb
+= self
.output
.eq(adder
.output
)
313 # go on to handle recursive case
314 intermediate_terms
: List
[Signal
]
315 intermediate_terms
= []
317 def add_intermediate_term(value
: Value
) -> None:
318 intermediate_term
= Signal(
320 name
=f
"intermediate_terms[{len(intermediate_terms)}]")
321 intermediate_terms
.append(intermediate_term
)
322 m
.d
.comb
+= intermediate_term
.eq(value
)
324 part_mask
= self
._reg
_partition
_points
.as_mask(len(self
.output
))
326 # create full adders for this recursive level.
327 # this shrinks N terms to 2 * (N // 3) plus the remainder
329 adder_i
= FullAdder(len(self
.output
))
330 setattr(m
.submodules
, f
"adder_{i}", adder_i
)
331 m
.d
.comb
+= adder_i
.in0
.eq(self
._resized
_inputs
[i
])
332 m
.d
.comb
+= adder_i
.in1
.eq(self
._resized
_inputs
[i
+ 1])
333 m
.d
.comb
+= adder_i
.in2
.eq(self
._resized
_inputs
[i
+ 2])
334 add_intermediate_term(adder_i
.sum)
335 shifted_carry
= adder_i
.carry
<< 1
336 # mask out carry bits to prevent carries between partitions
337 add_intermediate_term((adder_i
.carry
<< 1) & part_mask
)
338 # handle the remaining inputs.
339 if len(self
.inputs
) % FULL_ADDER_INPUT_COUNT
== 1:
340 add_intermediate_term(self
._resized
_inputs
[-1])
341 elif len(self
.inputs
) % FULL_ADDER_INPUT_COUNT
== 2:
342 # Just pass the terms to the next layer, since we wouldn't gain
343 # anything by using a half adder since there would still be 2 terms
344 # and just passing the terms to the next layer saves gates.
345 add_intermediate_term(self
._resized
_inputs
[-2])
346 add_intermediate_term(self
._resized
_inputs
[-1])
348 assert len(self
.inputs
) % FULL_ADDER_INPUT_COUNT
== 0
349 # recursive invocation of ``AddReduce``
350 next_level
= AddReduce(intermediate_terms
,
352 self
.next_register_levels(),
353 self
._reg
_partition
_points
)
354 m
.submodules
.next_level
= next_level
355 m
.d
.comb
+= self
.output
.eq(next_level
.output
)
360 OP_MUL_SIGNED_HIGH
= 1
361 OP_MUL_SIGNED_UNSIGNED_HIGH
= 2 # a is signed, b is unsigned
362 OP_MUL_UNSIGNED_HIGH
= 3
365 class Mul8_16_32_64(Elaboratable
):
366 """Signed/Unsigned 8/16/32/64-bit partitioned integer multiplier.
368 Supports partitioning into any combination of 8, 16, 32, and 64-bit
369 partitions on naturally-aligned boundaries. Supports the operation being
370 set for each partition independently.
372 :attribute part_pts: the input partition points. Has a partition point at
373 multiples of 8 in 0 < i < 64. Each partition point's associated
374 ``Value`` is a ``Signal``. Modification not supported, except for by
376 :attribute part_ops: the operation for each byte. The operation for a
377 particular partition is selected by assigning the selected operation
378 code to each byte in the partition. The allowed operation codes are:
380 :attribute OP_MUL_LOW: the LSB half of the product. Equivalent to
381 RISC-V's `mul` instruction.
382 :attribute OP_MUL_SIGNED_HIGH: the MSB half of the product where both
383 ``a`` and ``b`` are signed. Equivalent to RISC-V's `mulh`
385 :attribute OP_MUL_SIGNED_UNSIGNED_HIGH: the MSB half of the product
386 where ``a`` is signed and ``b`` is unsigned. Equivalent to RISC-V's
387 `mulhsu` instruction.
388 :attribute OP_MUL_UNSIGNED_HIGH: the MSB half of the product where both
389 ``a`` and ``b`` are unsigned. Equivalent to RISC-V's `mulhu`
393 def __init__(self
, register_levels
: Iterable
[int] = ()):
394 self
.part_pts
= PartitionPoints()
395 for i
in range(8, 64, 8):
396 self
.part_pts
[i
] = Signal(name
=f
"part_pts_{i}")
397 self
.part_ops
= [Signal(2, name
=f
"part_ops_{i}") for i
in range(8)]
400 self
.output
= Signal(64)
401 self
.register_levels
= list(register_levels
)
402 self
._intermediate
_output
= Signal(128)
403 self
._delayed
_part
_ops
= [
404 [Signal(2, name
=f
"_delayed_part_ops_{delay}_{i}")
406 for delay
in range(1 + len(self
.register_levels
))]
407 self
._part
_8 = [Signal(name
=f
"_part_8_{i}") for i
in range(8)]
408 self
._part
_16 = [Signal(name
=f
"_part_16_{i}") for i
in range(4)]
409 self
._part
_32 = [Signal(name
=f
"_part_32_{i}") for i
in range(2)]
410 self
._part
_64 = [Signal(name
=f
"_part_64")]
411 self
._delayed
_part
_8 = [
412 [Signal(name
=f
"_delayed_part_8_{delay}_{i}")
414 for delay
in range(1 + len(self
.register_levels
))]
415 self
._delayed
_part
_16 = [
416 [Signal(name
=f
"_delayed_part_16_{delay}_{i}")
418 for delay
in range(1 + len(self
.register_levels
))]
419 self
._delayed
_part
_32 = [
420 [Signal(name
=f
"_delayed_part_32_{delay}_{i}")
422 for delay
in range(1 + len(self
.register_levels
))]
423 self
._delayed
_part
_64 = [
424 [Signal(name
=f
"_delayed_part_64_{delay}")]
425 for delay
in range(1 + len(self
.register_levels
))]
426 self
._output
_64 = Signal(64)
427 self
._output
_32 = Signal(64)
428 self
._output
_16 = Signal(64)
429 self
._output
_8 = Signal(64)
430 self
._a
_signed
= [Signal(name
=f
"_a_signed_{i}") for i
in range(8)]
431 self
._b
_signed
= [Signal(name
=f
"_b_signed_{i}") for i
in range(8)]
432 self
._not
_a
_term
_8 = Signal(128)
433 self
._neg
_lsb
_a
_term
_8 = Signal(128)
434 self
._not
_b
_term
_8 = Signal(128)
435 self
._neg
_lsb
_b
_term
_8 = Signal(128)
436 self
._not
_a
_term
_16 = Signal(128)
437 self
._neg
_lsb
_a
_term
_16 = Signal(128)
438 self
._not
_b
_term
_16 = Signal(128)
439 self
._neg
_lsb
_b
_term
_16 = Signal(128)
440 self
._not
_a
_term
_32 = Signal(128)
441 self
._neg
_lsb
_a
_term
_32 = Signal(128)
442 self
._not
_b
_term
_32 = Signal(128)
443 self
._neg
_lsb
_b
_term
_32 = Signal(128)
444 self
._not
_a
_term
_64 = Signal(128)
445 self
._neg
_lsb
_a
_term
_64 = Signal(128)
446 self
._not
_b
_term
_64 = Signal(128)
447 self
._neg
_lsb
_b
_term
_64 = Signal(128)
449 def _part_byte(self
, index
: int) -> Value
:
450 if index
== -1 or index
== 7:
452 assert index
>= 0 and index
< 8
453 return self
.part_pts
[index
* 8 + 8]
455 def elaborate(self
, platform
: Any
) -> Module
:
458 for i
in range(len(self
.part_ops
)):
459 m
.d
.comb
+= self
._delayed
_part
_ops
[0][i
].eq(self
.part_ops
[i
])
460 m
.d
.sync
+= [self
._delayed
_part
_ops
[j
+ 1][i
]
461 .eq(self
._delayed
_part
_ops
[j
][i
])
462 for j
in range(len(self
.register_levels
))]
464 for parts
, delayed_parts
in [(self
._part
_64, self
._delayed
_part
_64),
465 (self
._part
_32, self
._delayed
_part
_32),
466 (self
._part
_16, self
._delayed
_part
_16),
467 (self
._part
_8, self
._delayed
_part
_8)]:
468 byte_count
= 8 // len(parts
)
469 for i
in range(len(parts
)):
470 value
= self
._part
_byte
(i
* byte_count
- 1)
471 for j
in range(i
* byte_count
, (i
+ 1) * byte_count
- 1):
472 value
&= ~self
._part
_byte
(j
)
473 value
&= self
._part
_byte
((i
+ 1) * byte_count
- 1)
474 m
.d
.comb
+= parts
[i
].eq(value
)
475 m
.d
.comb
+= delayed_parts
[0][i
].eq(parts
[i
])
476 m
.d
.sync
+= [delayed_parts
[j
+ 1][i
].eq(delayed_parts
[j
][i
])
477 for j
in range(len(self
.register_levels
))]
480 Signal(16, name
=f
"products_{i}_{j}")
484 for a_index
in range(8):
485 for b_index
in range(8):
486 a
= self
.a
.part(a_index
* 8, 8)
487 b
= self
.b
.part(b_index
* 8, 8)
488 m
.d
.comb
+= products
[a_index
][b_index
].eq(a
* b
)
492 def add_term(value
: Value
,
494 enabled
: Optional
[Value
] = None) -> None:
497 if enabled
is not None:
498 value
= Mux(enabled
, value
, 0)
500 value
= Cat(Repl(C(0, 1), shift
), value
)
503 m
.d
.comb
+= term
.eq(value
)
505 for a_index
in range(8):
506 for b_index
in range(8):
507 term_enabled
: Value
= C(True, 1)
508 min_index
= min(a_index
, b_index
)
509 max_index
= max(a_index
, b_index
)
510 for i
in range(min_index
, max_index
):
511 term_enabled
&= ~self
._part
_byte
(i
)
512 add_term(products
[a_index
][b_index
],
513 8 * (a_index
+ b_index
),
517 a_signed
= self
.part_ops
[i
] != OP_MUL_UNSIGNED_HIGH
518 b_signed
= (self
.part_ops
[i
] == OP_MUL_LOW
) \
519 |
(self
.part_ops
[i
] == OP_MUL_SIGNED_HIGH
)
520 m
.d
.comb
+= self
._a
_signed
[i
].eq(a_signed
)
521 m
.d
.comb
+= self
._b
_signed
[i
].eq(b_signed
)
523 # it's fine to bitwise-or these together since they are never enabled
525 add_term(self
._not
_a
_term
_8 | self
._not
_a
_term
_16
526 | self
._not
_a
_term
_32 | self
._not
_a
_term
_64)
527 add_term(self
._neg
_lsb
_a
_term
_8 | self
._neg
_lsb
_a
_term
_16
528 | self
._neg
_lsb
_a
_term
_32 | self
._neg
_lsb
_a
_term
_64)
529 add_term(self
._not
_b
_term
_8 | self
._not
_b
_term
_16
530 | self
._not
_b
_term
_32 | self
._not
_b
_term
_64)
531 add_term(self
._neg
_lsb
_b
_term
_8 | self
._neg
_lsb
_b
_term
_16
532 | self
._neg
_lsb
_b
_term
_32 | self
._neg
_lsb
_b
_term
_64)
540 self
._neg
_lsb
_a
_term
_8,
542 self
._neg
_lsb
_b
_term
_8,
544 (self
._not
_a
_term
_16,
545 self
._neg
_lsb
_a
_term
_16,
547 self
._neg
_lsb
_b
_term
_16,
549 (self
._not
_a
_term
_32,
550 self
._neg
_lsb
_a
_term
_32,
552 self
._neg
_lsb
_b
_term
_32,
554 (self
._not
_a
_term
_64,
555 self
._neg
_lsb
_a
_term
_64,
557 self
._neg
_lsb
_b
_term
_64,
560 byte_width
= 8 // len(parts
)
561 bit_width
= 8 * byte_width
562 for i
in range(len(parts
)):
563 b_enabled
= parts
[i
] & self
.a
[(i
+ 1) * bit_width
- 1] \
564 & self
._a
_signed
[i
* byte_width
]
565 a_enabled
= parts
[i
] & self
.b
[(i
+ 1) * bit_width
- 1] \
566 & self
._b
_signed
[i
* byte_width
]
568 # for 8-bit values: form a * 0xFF00 by using -a * 0x100, the
569 # negation operation is split into a bitwise not and a +1.
570 # likewise for 16, 32, and 64-bit values.
572 not_a_term
.part(bit_width
* 2 * i
, bit_width
* 2)
574 Cat(Repl(0, bit_width
),
575 ~self
.a
.part(bit_width
* i
, bit_width
)),
578 neg_lsb_a_term
.part(bit_width
* 2 * i
, bit_width
* 2)
579 .eq(Cat(Repl(0, bit_width
), a_enabled
)),
581 not_b_term
.part(bit_width
* 2 * i
, bit_width
* 2)
583 Cat(Repl(0, bit_width
),
584 ~self
.b
.part(bit_width
* i
, bit_width
)),
587 neg_lsb_b_term
.part(bit_width
* 2 * i
, bit_width
* 2)
588 .eq(Cat(Repl(0, bit_width
), b_enabled
))]
590 expanded_part_pts
= PartitionPoints()
591 for i
, v
in self
.part_pts
.items():
592 signal
= Signal(name
=f
"expanded_part_pts_{i*2}")
593 expanded_part_pts
[i
* 2] = signal
594 m
.d
.comb
+= signal
.eq(v
)
596 add_reduce
= AddReduce(terms
,
598 self
.register_levels
,
600 m
.submodules
.add_reduce
= add_reduce
601 m
.d
.comb
+= self
._intermediate
_output
.eq(add_reduce
.output
)
602 m
.d
.comb
+= self
._output
_64.eq(
603 Mux(self
._delayed
_part
_ops
[-1][0] == OP_MUL_LOW
,
604 self
._intermediate
_output
.part(0, 64),
605 self
._intermediate
_output
.part(64, 64)))
607 m
.d
.comb
+= self
._output
_32.part(i
* 32, 32).eq(
608 Mux(self
._delayed
_part
_ops
[-1][4 * i
] == OP_MUL_LOW
,
609 self
._intermediate
_output
.part(i
* 64, 32),
610 self
._intermediate
_output
.part(i
* 64 + 32, 32)))
612 m
.d
.comb
+= self
._output
_16.part(i
* 16, 16).eq(
613 Mux(self
._delayed
_part
_ops
[-1][2 * i
] == OP_MUL_LOW
,
614 self
._intermediate
_output
.part(i
* 32, 16),
615 self
._intermediate
_output
.part(i
* 32 + 16, 16)))
617 m
.d
.comb
+= self
._output
_8.part(i
* 8, 8).eq(
618 Mux(self
._delayed
_part
_ops
[-1][i
] == OP_MUL_LOW
,
619 self
._intermediate
_output
.part(i
* 16, 8),
620 self
._intermediate
_output
.part(i
* 16 + 8, 8)))
622 m
.d
.comb
+= self
.output
.part(i
* 8, 8).eq(
623 Mux(self
._delayed
_part
_8[-1][i
]
624 | self
._delayed
_part
_16[-1][i
// 2],
625 Mux(self
._delayed
_part
_8[-1][i
],
626 self
._output
_8.part(i
* 8, 8),
627 self
._output
_16.part(i
* 8, 8)),
628 Mux(self
._delayed
_part
_32[-1][i
// 4],
629 self
._output
_32.part(i
* 8, 8),
630 self
._output
_64.part(i
* 8, 8))))
634 if __name__
== "__main__":
638 m
._intermediate
_output
,
641 *m
.part_pts
.values()])