modify reorder_bits to copy the MSB of the partition to each bit
[ieee754fpu.git] / src / ieee754 / part_cmp / eq_gt_ge.py
1 # SPDX-License-Identifier: LGPL-2.1-or-later
2 # See Notices.txt for copyright information
3
4 """
5 Copyright (C) 2020 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
6
7 dynamically-partitionable "comparison" class, directly equivalent
8 to Signal.__eq__ except SIMD-partitionable
9
10 See:
11
12 * http://libre-riscv.org/3d_gpu/architecture/dynamic_simd/eq
13 * http://bugs.libre-riscv.org/show_bug.cgi?id=132
14 """
15
16 from nmigen import Signal, Module, Elaboratable, Cat, C, Mux, Repl
17 from nmigen.back.pysim import Simulator, Delay, Settle
18 from nmigen.cli import main, rtlil
19
20 from ieee754.part_mul_add.partpoints import PartitionPoints
21 from ieee754.part_cmp.gt_combiner import GTCombiner
22 from ieee754.part_cmp.reorder_results import ReorderResults
23 from ieee754.part_cmp.ripple import RippleLSB
24
25
26 class PartitionedEqGtGe(Elaboratable):
27 EQ = C(0b00, 2)
28 GT = C(0b01, 2)
29 GE = C(0b10, 2)
30
31 # Expansion of the partitioned equals module to handle Greater
32 # Than and Greater than or Equal to. The function being evaluated
33 # is selected by the opcode signal, where:
34 # opcode 0x00 - EQ
35 # opcode 0x01 - GT
36 # opcode 0x02 - GE
37 def __init__(self, width, partition_points):
38 """Create a ``PartitionedEq`` operator
39 """
40 self.width = width
41 self.a = Signal(width, reset_less=True)
42 self.b = Signal(width, reset_less=True)
43 self.opcode = Signal(2)
44 self.partition_points = PartitionPoints(partition_points)
45 self.mwidth = len(self.partition_points)+1
46 self.output = Signal(self.mwidth, reset_less=True)
47 assert (self.partition_points.fits_in_width(width),
48 "partition_points doesn't fit in width")
49
50 def elaborate(self, platform):
51 m = Module()
52 comb = m.d.comb
53 m.submodules.gtc = gtc = GTCombiner(self.mwidth)
54
55 m.submodules.reorder = reorder = ReorderResults(self.mwidth)
56
57 # make a series of "eqs" and "gts", splitting a and b into
58 # partition chunks
59 eqs = Signal(self.mwidth, reset_less=True)
60 eql = []
61 gts = Signal(self.mwidth, reset_less=True)
62 gtl = []
63
64 keys = list(self.partition_points.keys()) + [self.width]
65 start = 0
66 for i in range(len(keys)):
67 end = keys[i]
68 eql.append(self.a[start:end] == self.b[start:end])
69 gtl.append(self.a[start:end] > self.b[start:end])
70 start = end # for next time round loop
71 comb += eqs.eq(Cat(*eql))
72 comb += gts.eq(Cat(*gtl))
73
74 # control the constant injected into the partition
75 # next to a closed gate
76 aux_input = Signal()
77 # enable or disable the gt input for the gt partition combiner
78 gt_en = Signal()
79
80 with m.Switch(self.opcode):
81 with m.Case(0b00): # equals
82 comb += aux_input.eq(1)
83 comb += gt_en.eq(0)
84 with m.Case(0b01): # greater than
85 comb += aux_input.eq(0)
86 comb += gt_en.eq(1)
87 with m.Case(0b10): # greater than or equal to
88 comb += aux_input.eq(1)
89 comb += gt_en.eq(1)
90
91 results = Signal(self.mwidth, reset_less=True)
92 comb += gtc.gates.eq(self.partition_points.as_sig())
93 comb += gtc.eqs.eq(eqs)
94 comb += gtc.gts.eq(gts)
95 comb += gtc.aux_input.eq(aux_input)
96 comb += gtc.gt_en.eq(gt_en)
97 comb += results.eq(gtc.outputs)
98
99 comb += reorder.results_in.eq(results)
100 comb += reorder.gates.eq(self.partition_points.as_sig())
101
102 comb += self.output.eq(reorder.output)
103
104 return m
105
106 def ports(self):
107 return [self.a, self.b, self.opcode,
108 self.partition_points.as_sig(),
109 self.output]
110
111 if __name__ == "__main__":
112 from ieee754.part_mul_add.partpoints import make_partition
113 m = Module()
114 mask = Signal(4)
115 m.submodules.egg = egg = PartitionedEqGtGe(16, make_partition(mask, 16))
116
117 sim = Simulator(m)
118
119 def process():
120 yield mask.eq(0b010)
121 yield egg.a.eq(0xf000)
122 yield egg.b.eq(0)
123 yield egg.opcode.eq(0b00)
124 yield Delay(1e-6)
125 out = yield egg.output
126 print("out", bin(out))
127 yield mask.eq(0b111)
128 yield egg.a.eq(0x0000)
129 yield egg.b.eq(0)
130 yield Delay(1e-6)
131 yield mask.eq(0b010)
132 yield egg.a.eq(0x0000)
133 yield egg.b.eq(0)
134 yield Delay(1e-6)
135 out = yield egg.output
136 print("out", bin(out))
137
138 sim.add_process(process)
139 with sim.write_vcd("eq_gt_ge.vcd", "eq_gt_ge.gtkw", traces=egg.ports()):
140 sim.run()