redo grev
[nmutil.git] / src / nmutil / grev.py
1 # SPDX-License-Identifier: LGPL-3-or-later
2 # Copyright 2021 Jacob Lifshay programmerjake@gmail.com
3 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
4
5 # Funded by NLnet Assure Programme 2021-02-052, https://nlnet.nl/assure part
6 # of Horizon 2020 EU Programme 957073.
7
8 r"""Generalized bit-reverse.
9
10 https://bugs.libre-soc.org/show_bug.cgi?id=755
11
12 A generalized bit-reverse is the following operation:
13 grev(input, chunk_sizes):
14 for i in range(input.width):
15 j = i XOR chunk_sizes
16 output bit i = input bit j
17 return output
18
19 This is useful because many bit/byte reverse operations can be created by
20 setting `chunk_sizes` to different values. Some examples for a 64-bit
21 `grev` operation:
22 * `0b111111` -- reverse all bits in the 64-bit word
23 * `0b111000` -- reverse bytes in the 64-bit word
24 * `0b011000` -- reverse bytes in each 32-bit word independently
25 * `0b110000` -- reverse order of 16-bit words
26
27 This is implemented by using a series of `log2_width`
28 `width`-bit wide 2:1 muxes, arranged just like a butterfly network:
29 https://en.wikipedia.org/wiki/Butterfly_network
30
31 To compute `out = grev(inp, 0bxyz)`, where `x`, `y`, and `z` are single bits,
32 the following permutation network is used:
33
34 inp[0] inp[1] inp[2] inp[3] inp[4] inp[5] inp[6] inp[7]
35 | | | | | | | |
36 the value here is | | | | | | | |
37 grev(inp, 0b000): | | | | | | | |
38 | | | | | | | |
39 + + + + + + + +
40 |\ /| |\ /| |\ /| |\ /|
41 | \ / | | \ / | | \ / | | \ / |
42 | \ / | | \ / | | \ / | | \ / |
43 swap 1-bit words: | X | | X | | X | | X |
44 | / \ | | / \ | | / \ | | / \ |
45 | / \ | | / \ | | / \ | | / \ |
46 z--Mux z--Mux z--Mux z--Mux z--Mux z--Mux z--Mux z--Mux
47 | | | | | | | |
48 the value here is | | | | | | | |
49 grev(inp, 0b00z): | | | | | | | |
50 | | | | | | | |
51 | | +-----|-------+ | | +-----|-------+
52 | +-----|-|-----+ | | +-----|-|-----+ |
53 | | | | | | | | | | | |
54 swap 2-bit words: | | +-|-----|-----+ | | | +-|-----|-----+ |
55 +-|-----|-|---+ | | | +-|-----|-|---+ | | |
56 | | | | | | | | | | | | | | | |
57 | / | / \ | \ | | / | / \ | \ |
58 y--Mux y--Mux y--Mux y--Mux y--Mux y--Mux y--Mux y--Mux
59 | | | | | | | |
60 the value here is | | | | | | | |
61 grev(inp, 0b0yz): | | | | | | | |
62 | | | | | | | |
63 | | | | +-----|-------|-------|-------+
64 | | | +-----|-|-----|-------|-------+ |
65 | | +-----|-|-----|-|-----|-------+ | |
66 | +-----|-|-----|-|-----|-|-----+ | | |
67 swap 4-bit words: | | | | | | | | | | | |
68 | | | | | | +-|-----|-------|-------|-----+ |
69 | | | | +-|-----|-|-----|-------|-----+ | | |
70 | | +-|-----|-|-----|-|-----|-----+ | | | | |
71 +-|-----|-|-----|-|-----|-|---+ | | | | | | |
72 | | | | | | | | | | | | | | | |
73 | / | / | / | / \ | \ | \ | \ |
74 x--Mux x--Mux x--Mux x--Mux x--Mux x--Mux x--Mux x--Mux
75 | | | | | | | |
76 the value here is | | | | | | | |
77 grev(inp, 0bxyz): | | | | | | | |
78 | | | | | | | |
79 out[0] out[1] out[2] out[3] out[4] out[5] out[6] out[7]
80 """
81
82 from nmigen.hdl.ast import Signal, Mux, Cat
83 from nmigen.hdl.ast import Assert
84 from nmigen.hdl.dsl import Module
85 from nmigen.hdl.ir import Elaboratable
86 import string
87
88
89 def grev(inval, chunk_sizes, log2_width):
90 """Python reference implementation of generalized bit-reverse.
91 See `GRev` for documentation.
92 """
93 # mask inputs into range
94 inval &= 2 ** 2 ** log2_width - 1
95 chunk_sizes &= 2 ** log2_width - 1
96 # core algorithm:
97 retval = 0
98 for i in range(2 ** log2_width):
99 # don't use `if` so this can be used with nmigen values
100 bit = (inval & (1 << i)) != 0
101 retval |= bit << (i ^ chunk_sizes)
102 return retval
103
104
105 class GRev(Elaboratable):
106 """Generalized bit-reverse.
107
108 See the module's documentation for a description of generalized
109 bit-reverse, as well as the permutation network created by this class.
110
111 Attributes:
112 log2_width: int
113 see __init__'s docs.
114 msb_first: bool
115 see __init__'s docs.
116 width: int
117 the input/output width of the grev operation. The value is
118 `2 ** self.log2_width`.
119 input: Signal with width=self.width
120 the input value of the grev operation.
121 chunk_sizes: Signal with width=self.log2_width
122 the input that describes which bits get swapped. See the module docs
123 for additional details.
124 output: Signal with width=self.width
125 the output value of the grev operation.
126 """
127
128 def __init__(self, log2_width, msb_first=False):
129 """Create a `GRev` instance.
130
131 log2_width: int
132 the base-2 logarithm of the input/output width of the grev
133 operation.
134 msb_first: bool
135 If `msb_first` is True, then the order will be the reverse of the
136 standard order -- swapping adjacent 8-bit words, then 4-bit words,
137 then 2-bit words, then 1-bit words -- using the bits of
138 `chunk_sizes` from MSB to LSB.
139 If `msb_first` is False (the default), then the order will be the
140 standard order -- swapping adjacent 1-bit words, then 2-bit words,
141 then 4-bit words, then 8-bit words -- using the bits of
142 `chunk_sizes` from LSB to MSB.
143 """
144 self.log2_width = log2_width
145 self.msb_first = msb_first
146 self.width = 1 << log2_width
147 self.input = Signal(self.width)
148 self.chunk_sizes = Signal(log2_width)
149 self.output = Signal(self.width)
150
151 # internal signals exposed for unit tests, should be ignored by
152 # external users. The signals are created in the constructor because
153 # that's where all class member variables should *always* be created.
154 # If we were to create the members in elaborate() instead, it would
155 # just make the class very confusing to use.
156 #
157 # `_intermediates[step_count]`` is the value after `step_count` steps
158 # of muxing. e.g. (for `msb_first == False`) `_intermediates[4]` is the
159 # result of 4 steps of muxing, being the value `grev(inp,0b00wxyz)`.
160 self._intermediates = [self.__inter(i) for i in range(log2_width + 1)]
161
162 def _get_cs_bit_index(self, step_index):
163 """get the index of the bit of `chunk_sizes` that this step should mux
164 based off of."""
165 assert 0 <= step_index < self.log2_width
166 if self.msb_first:
167 # reverse so we start from the MSB, producing intermediate values
168 # like, for `step_index == 4`, `0buvwx00` rather than `0b00wxyz`
169 return self.log2_width - step_index - 1
170 return step_index
171
172 def __inter(self, step_count):
173 """make a signal with a name like `grev(inp,0b000xyz)` to match the
174 diagram in the module-level docs."""
175 # make the list of bits in LSB to MSB order
176 chunk_sizes_bits = ['0'] * self.log2_width
177 # for all steps already completed
178 for step_index in range(step_count):
179 bit_num = self._get_cs_bit_index(step_index)
180 ch = string.ascii_lowercase[-1 - bit_num] # count from z to a
181 chunk_sizes_bits[bit_num] = ch
182 # reverse cuz text is MSB first
183 chunk_sizes_val = '0b' + ''.join(reversed(chunk_sizes_bits))
184 # name works according to Verilog's rules for escaped identifiers cuz
185 # it has no spaces
186 name = f"grev(inp,{chunk_sizes_val})"
187 return Signal(self.width, name=name)
188
189 def __get_permutation(self, step_index):
190 """get the bit permutation for the current step. the returned value is
191 a list[int] where `retval[i] == j` means that this step's input bit `i`
192 goes to this step's output bit `j`."""
193 # we can extract just the latest bit for this step, since the previous
194 # step effectively has it's value's grev arg as `0b000xyz`, and this
195 # step has it's value's grev arg as `0b00wxyz`, so we only need to
196 # compute `grev(prev_step_output,0b00w000)` to get
197 # `grev(inp,0b00wxyz)`. `cur_chunk_sizes` is the `0b00w000`.
198 cur_chunk_sizes = 1 << self._get_cs_bit_index(step_index)
199 # compute bit permutation for `grev(...,0b00w000)`.
200 return [i ^ cur_chunk_sizes for i in range(self.width)]
201
202 def _sigs_and_expected(self, inp, chunk_sizes):
203 """the intermediate signals and the expected values, based off of the
204 passed-in `inp` and `chunk_sizes`."""
205 # we accumulate a mask of which chunk_sizes bits we have accounted for
206 # so far
207 chunk_sizes_mask = 0
208 for step_count, intermediate in enumerate(self._intermediates):
209 # mask out chunk_sizes to get the value
210 cur_chunk_sizes = chunk_sizes & chunk_sizes_mask
211 expected = grev(inp, cur_chunk_sizes, self.log2_width)
212 yield (intermediate, expected)
213 # if step_count is in-range for being a valid step_index
214 if step_count < self.log2_width:
215 # add current step's bit to the mask
216 chunk_sizes_mask |= 1 << self._get_cs_bit_index(step_count)
217 assert chunk_sizes_mask == 2 ** self.log2_width - 1, \
218 "should have got all the bits in chunk_sizes"
219
220 def elaborate(self, platform):
221 m = Module()
222
223 # value after zero steps is just the input
224 m.d.comb += self._intermediates[0].eq(self.input)
225
226 for step_index in range(self.log2_width):
227 step_inp = self._intermediates[step_index]
228 step_out = self._intermediates[step_index + 1]
229 # get permutation for current step
230 permutation = self.__get_permutation(step_index)
231 # figure out which `chunk_sizes` bit we want to pay attention to
232 # for this step.
233 sel = self.chunk_sizes[self._get_cs_bit_index(step_index)]
234 for in_index, out_index in enumerate(permutation):
235 # use in_index so we get the permuted bit
236 permuted_bit = step_inp[in_index]
237 # use out_index so we copy the bit straight thru
238 straight_bit = step_inp[out_index]
239 bit = Mux(sel, permuted_bit, straight_bit)
240 m.d.comb += step_out[out_index].eq(bit)
241 # value after all steps is just the output
242 m.d.comb += self.output.eq(self._intermediates[-1])
243
244 if platform != 'formal':
245 return m
246
247 # formal test comparing directly against the (simpler) version
248 m.d.comb += Assert(self.output == grev(self.input,
249 self.chunk_sizes,
250 self.log2_width))
251
252 for value, expected in self._sigs_and_expected(self.input,
253 self.chunk_sizes):
254 m.d.comb += Assert(value == expected)
255 return m
256
257 def ports(self):
258 return [self.input, self.chunk_sizes, self.output]
259
260
261 # useful to see what is going on:
262 # python3 src/nmutil/test/test_grev.py
263 # yosys <<<"read_ilang sim_test_out/__main__.TestGrev.test_small/0.il; proc; clean -purge; show top"