switch to exact version of cython
[ieee754fpu.git] / src / ieee754 / part_shift / part_shift_scalar.py
1 # SPDX-License-Identifier: LGPL-2.1-or-later
2 # See Notices.txt for copyright information
3
4 """
5 Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
6
7 dynamically partitionable shifter. Only the operand to be shifted can
8 be partitioned, the amount to shift by *must* be a scalar
9
10 See:
11
12 * http://libre-riscv.org/3d_gpu/architecture/dynamic_simd/shift/
13 * http://bugs.libre-riscv.org/show_bug.cgi?id=173
14 """
15 from nmigen import Signal, Module, Elaboratable, Cat, Mux
16 from ieee754.part_mul_add.partpoints import PartitionPoints
17 from ieee754.part_shift.part_shift_dynamic import ShifterMask
18 from ieee754.part_shift.bitrev import GatedBitReverse
19 import math
20
21
22 class PartitionedScalarShift(Elaboratable):
23 def __init__(self, width, partition_points):
24 self.width = width
25 self.partition_points = PartitionPoints(partition_points)
26
27 self.data = Signal(width, reset_less=True)
28 self.shiftbits = math.ceil(math.log2(width))
29 self.shifter = Signal(self.shiftbits, reset_less=True)
30 self.output = Signal(width, reset_less=True)
31 self.shift_right = Signal(reset_less=True) # Whether to shift right
32
33 def elaborate(self, platform):
34 m = Module()
35 comb = m.d.comb
36 width = self.width
37 pwid = self.partition_points.get_max_partition_count(width)-1
38 shiftbits = self.shiftbits
39 gates = self.partition_points.as_sig()
40
41 parts = []
42 outputs = []
43 shiftparts = []
44 intervals = []
45 keys = list(self.partition_points.keys()) + [self.width]
46
47 m.submodules.in_br = in_br = GatedBitReverse(self.data.width)
48 comb += in_br.data.eq(self.data)
49 comb += in_br.reverse_en.eq(self.shift_right)
50
51 m.submodules.out_br = out_br = GatedBitReverse(self.data.width)
52 comb += out_br.reverse_en.eq(self.shift_right)
53 comb += self.output.eq(out_br.output)
54
55 m.submodules.gate_br = gate_br = GatedBitReverse(pwid)
56 comb += gate_br.data.eq(gates)
57 comb += gate_br.reverse_en.eq(self.shift_right)
58 start = 0
59 for i in range(len(keys)):
60 end = keys[i]
61 parts.append(in_br.output[start:end])
62 outputs.append(out_br.data[start:end])
63 intervals.append((start,end))
64 start = end # for next time round loop
65
66 min_bits = math.ceil(math.log2(intervals[0][1] - intervals[0][0]))
67 shifter_masks = []
68 for i in range(len(intervals)):
69 max_bits = math.ceil(math.log2(width-intervals[i][0]))
70 sm_mask = Signal(shiftbits, name="sm_mask%d" % i, reset_less=True)
71 if pwid-i != 0:
72 sm = ShifterMask(pwid-i, shiftbits,
73 max_bits, min_bits)
74 comb += sm.gates.eq(gate_br.output[i:pwid])
75 comb += sm_mask.eq(sm.mask)
76 setattr(m.submodules, "sm%d" % i, sm)
77 else: # having a 0 width signal seems to give the proof issues
78 # this seems to fix it
79 comb += sm_mask.eq((1<<min_bits)-1)
80 if i != 0:
81 shifter_mask = Signal(shiftbits, name="shifter_mask%d" % i,
82 reset_less=True)
83 comb += shifter_mask.eq(Mux(gate_br.output[i-1],
84 sm_mask,
85 shifter_masks[i-1]))
86 shifter_masks.append(shifter_mask)
87 else:
88 shifter_masks.append(sm_mask)
89
90 for i, interval in enumerate(intervals):
91 s,e = interval
92 sp = Signal(width, name="sp%d" % i, reset_less=True)
93 _shifter = Signal(self.shifter.width, name="shifter%d" % i,
94 reset_less=True)
95 comb += _shifter.eq(self.shifter & shifter_masks[i])
96 comb += sp[s:].eq(in_br.output[s:e] << _shifter)
97 shiftparts.append(sp)
98
99
100 for i, interval in enumerate(intervals):
101 start, end = interval
102 if i == 0:
103 intermed = shiftparts[i]
104 else:
105 intermed = shiftparts[i] | Mux(gate_br.output[i-1], 0, prev)
106 comb += outputs[i].eq(intermed[start:end])
107 prev = intermed
108
109 return m