1 # SPDX-License-Identifier: LGPL-3-or-later
2 # Copyright 2022 Jacob Lifshay programmerjake@gmail.com
4 # Funded by NLnet Assure Programme 2021-02-052, https://nlnet.nl/assure part
5 # of Horizon 2020 EU Programme 957073.
7 """ Carry-less Division and Remainder.
9 https://bugs.libre-soc.org/show_bug.cgi?id=784
12 from dataclasses
import dataclass
, field
, fields
13 from nmigen
.hdl
.ir
import Elaboratable
14 from nmigen
.hdl
.ast
import Signal
, Value
15 from nmigen
.hdl
.dsl
import Module
16 from nmutil
.singlepipe
import ControlBase
19 def equal_leading_zero_count_reference(a
, b
, width
):
20 """checks if `clz(a) == clz(b)`.
21 Reference code for algorithm used in `EqualLeadingZeroCount`.
23 assert isinstance(width
, int) and 0 <= width
24 assert isinstance(a
, int) and 0 <= a
< (1 << width
)
25 assert isinstance(b
, int) and 0 <= b
< (1 << width
)
26 eq
= True # both have no leading zeros so far...
27 for i
in range(width
):
30 # `both_ones` is set if both have no leading zeros so far
31 both_ones
= a_bit
& b_bit
32 # `different` is set if there are a different number of leading
34 different
= a_bit
!= b_bit
40 pass # propagate from lower bits
44 class EqualLeadingZeroCount(Elaboratable
):
45 """checks if `clz(a) == clz(b)`.
49 the width in bits of `a` and `b`.
50 a: Signal of width `width`
52 b: Signal of width `width`
54 out: Signal of width `1`
55 output, set if the number of leading zeros in `a` is the same as in
59 def __init__(self
, width
):
60 assert isinstance(width
, int)
62 self
.a
= Signal(width
)
63 self
.b
= Signal(width
)
66 def elaborate(self
, platform
):
67 # the operation is converted into calculation of the carry-out of a
68 # binary addition, allowing FPGAs to re-use their specialized
69 # carry-propagation logic. This should be simplified by yosys to
70 # remove the extraneous xor gates from addition when targeting
71 # FPGAs/ASICs, so no efficiency is lost.
73 # see `equal_leading_zero_count_reference` for a Python version of
74 # the algorithm, but without conversion to carry-propagation.
75 # note that it's possible to do all the bits at once: a for-loop
76 # (unlike in the reference-code) is not necessary
79 both_ones
= Signal(self
.width
)
80 different
= Signal(self
.width
)
82 # build `both_ones` and `different` such that:
83 # for every bit index `i`:
84 # * if `both_ones[i]` is set, then both addends bits at index `i` are
85 # set in order to set the carry bit out, since `cin + 1 + 1` always
87 # * if `different[i]` is set, then both addends bits at index `i` are
88 # zeros in order to clear the carry bit out, since `cin + 0 + 0`
89 # never has a carry out.
90 # * otherwise exactly one of the addends bits at index `i` is set and
91 # the other is clear in order to propagate the carry bit from
92 # less significant bits, since `cin + 1 + 0` has a carry out that is
95 # `both_ones` is set if both have no leading zeros so far
96 m
.d
.comb
+= both_ones
.eq(self
.a
& self
.b
)
97 # `different` is set if there are a different number of leading
99 m
.d
.comb
+= different
.eq(self
.a ^ self
.b
)
101 # now [ab]use add: the last bit [carry-out] is the result
102 csum
= Signal(self
.width
+ 1)
103 carry_in
= 1 # both have no leading zeros so far, so set carry in
104 m
.d
.comb
+= csum
.eq(both_ones
+ (~different
) + carry_in
)
105 m
.d
.comb
+= self
.out
.eq(csum
[self
.width
]) # out is carry-out
110 @dataclass(frozen
=True, unsafe_hash
=True)
115 def __post_init__(self
):
116 assert self
.n_width
>= self
.width
> 0
123 def step_range(self
):
124 return range(self
.done_step
+ 1)
127 @dataclass(frozen
=True, eq
=False)
131 d
: Signal
= field(init
=False)
132 r
: Signal
= field(init
=False)
133 q
: Signal
= field(init
=False)
134 step
: Signal
= field(init
=False)
136 def __init__(self
, shape
, *, name
=None, src_loc_at
=0):
137 assert isinstance(shape
, CLDivRemShape
)
139 name
= Signal(src_loc_at
=1 + src_loc_at
).name
140 assert isinstance(name
, str)
141 d
= Signal(2 * shape
.width
, name
=f
"{name}_d")
142 r
= Signal(shape
.n_width
, name
=f
"{name}_r")
143 q
= Signal(shape
.width
, name
=f
"{name}_q")
144 step
= Signal(shape
.width
, name
=f
"{name}_step")
145 object.__setattr
__(self
, "shape", shape
)
146 object.__setattr
__(self
, "name", name
)
147 object.__setattr
__(self
, "d", d
)
148 object.__setattr
__(self
, "r", r
)
149 object.__setattr
__(self
, "q", q
)
150 object.__setattr
__(self
, "step", step
)
153 assert isinstance(rhs
, CLDivRemState
)
154 for f
in fields(CLDivRemState
):
155 if f
.name
in ("shape", "name"):
157 l
= getattr(self
, f
.name
)
158 r
= getattr(rhs
, f
.name
)
162 def like(other
, *, name
=None, src_loc_at
=0):
163 assert isinstance(other
, CLDivRemState
)
164 return CLDivRemState(other
.shape
, name
=name
, src_loc_at
=1 + src_loc_at
)
168 return self
.will_be_done_after(steps
=0)
170 def will_be_done_after(self
, steps
):
171 """ Returns True if this state will be done after
172 another `steps` passes through `set_to_next`."""
173 assert isinstance(steps
, int) and steps
>= 0
174 return self
.step
>= max(0, self
.shape
.done_step
- steps
)
176 def set_to_initial(self
, m
, n
, d
):
177 assert isinstance(m
, Module
)
179 self
.d
.eq(Value
.cast(d
) << self
.shape
.width
),
185 def set_to_next(self
, m
, state_in
):
186 assert isinstance(m
, Module
)
187 assert isinstance(state_in
, CLDivRemState
)
188 assert state_in
.shape
== self
.shape
189 assert self
is not state_in
, "a.set_to_next(m, a) is not allowed"
191 equal_leading_zero_count
= EqualLeadingZeroCount(self
.shape
.n_width
)
192 # can't name submodule since it would conflict if this function is
193 # called multiple times in a Module
194 m
.submodules
+= equal_leading_zero_count
196 with m
.If(state_in
.done
):
197 m
.d
.comb
+= self
.eq(state_in
)
200 self
.step
.eq(state_in
.step
+ 1),
201 self
.d
.eq(state_in
.d
>> 1),
202 equal_leading_zero_count
.a
.eq(self
.d
),
203 equal_leading_zero_count
.b
.eq(state_in
.r
),
205 d_top
= self
.d
[self
.shape
.n_width
:]
206 with m
.If(equal_leading_zero_count
.out
& (d_top
== 0)):
208 self
.r
.eq(state_in
.r ^ self
.d
),
209 self
.q
.eq((state_in
.q
<< 1) |
1),
213 self
.r
.eq(state_in
.r
),
214 self
.q
.eq(state_in
.q
<< 1),
218 class CLDivRemInputData
:
219 def __init__(self
, shape
):
220 assert isinstance(shape
, CLDivRemShape
)
222 self
.n
= Signal(shape
.n_width
)
223 self
.d
= Signal(shape
.width
)
226 """ Get member signals. """
231 """ Assign member signals. """
238 class CLDivRemOutputData
:
239 def __init__(self
, shape
):
240 assert isinstance(shape
, CLDivRemShape
)
242 self
.q
= Signal(shape
.width
)
243 self
.r
= Signal(shape
.width
)
246 """ Get member signals. """
251 """ Assign member signals. """
258 class CLDivRemFSMStage(ControlBase
):
259 """carry-less div/rem
265 number of steps that should be taken per clock cycle
267 input. true when the data inputs (`n` and `d`) are valid.
268 data transfer in occurs when `in_valid & in_ready`.
270 output. true when this FSM is ready to accept input.
271 data transfer in occurs when `in_valid & in_ready`.
272 n: Signal(shape.n_width)
273 numerator in, the value must be small enough that `q` and `r` don't
274 overflow. having `n_width == width` is sufficient.
275 d: Signal(shape.width)
276 denominator in, must be non-zero.
277 q: Signal(shape.width)
279 r: Signal(shape.width)
282 output. true when the data outputs (`q` and `r`) are valid
283 (or are junk because the inputs were out of range).
284 data transfer out occurs when `out_valid & out_ready`.
286 input. true when the output can be read.
287 data transfer out occurs when `out_valid & out_ready`.
290 def __init__(self
, pspec
, shape
, *, steps_per_clock
=4):
291 assert isinstance(shape
, CLDivRemShape
)
292 assert isinstance(steps_per_clock
, int) and steps_per_clock
>= 1
294 self
.steps_per_clock
= steps_per_clock
295 self
.pspec
= pspec
# store now: used in ispec and ospec
296 super().__init
__(stage
=self
)
297 self
.empty
= Signal(reset
=1)
298 self
.saved_state
= CLDivRemState(shape
)
301 return CLDivRemInputData(self
.shape
)
304 return CLDivRemOutputData(self
.shape
)
306 def setup(self
, m
, i
):
309 def elaborate(self
, platform
):
310 m
= super().elaborate(platform
)
311 i_data
: CLDivRemInputData
= self
.p
.i_data
312 o_data
: CLDivRemOutputData
= self
.n
.o_data
314 # TODO: handle cancellation
316 state_will_be_done
= self
.saved_state
.will_be_done_after(
317 self
.steps_per_clock
)
318 m
.d
.comb
+= self
.n
.o_valid
.eq(~self
.empty
& state_will_be_done
)
319 m
.d
.comb
+= self
.p
.o_ready
.eq(self
.empty
)
322 return CLDivRemState(self
.shape
, name
=f
"next_chain_{i}")
323 next_chain
= [make_nc(i
) for i
in range(self
.steps_per_clock
+ 1)]
324 for i
in range(self
.steps_per_clock
):
325 next_chain
[i
+ 1].set_to_next(m
, next_chain
[i
])
326 m
.d
.sync
+= self
.saved_state
.eq(next_chain
[-1])
327 m
.d
.comb
+= o_data
.q
.eq(next_chain
[-1].q
)
328 m
.d
.comb
+= o_data
.r
.eq(next_chain
[-1].r
)
330 with m
.If(self
.empty
):
331 next_chain
[0].set_to_initial(m
, n
=i_data
.n
, d
=i_data
.d
)
332 with m
.If(self
.p
.i_valid
):
333 m
.d
.sync
+= self
.empty
.eq(0)
335 m
.d
.comb
+= next_chain
[0].eq(self
.saved_state
)
336 with m
.If(self
.n
.i_ready
& self
.n
.o_valid
):
337 m
.d
.sync
+= self
.empty
.eq(1)