From: Luke Kenneth Casson Leighton Date: Thu, 2 May 2019 12:42:57 +0000 (+0100) Subject: move add to ieee754 directory X-Git-Tag: ls180-24jan2020~1090 X-Git-Url: https://git.libre-soc.org/?p=ieee754fpu.git;a=commitdiff_plain;h=58e455d3bd9b43d076468bf2b7b1f0784e5c4fd2;hp=6bff1a997f3846872cf489c24b5c01426c4dc97c move add to ieee754 directory --- diff --git a/src/add/concurrentunit.py b/src/add/concurrentunit.py deleted file mode 100644 index c0053c8b..00000000 --- a/src/add/concurrentunit.py +++ /dev/null @@ -1,74 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from math import log -from nmigen import Module -from nmigen.cli import main, verilog - -from singlepipe import PassThroughStage -from multipipe import CombMuxOutPipe -from multipipe import PriorityCombMuxInPipe - -from fpcommon.getop import FPADDBaseData -from fpcommon.denorm import FPSCData -from fpcommon.pack import FPPackData -from fpcommon.normtopack import FPNormToPack -from fpadd.specialcases import FPAddSpecialCasesDeNorm -from fpadd.addstages import FPAddAlignSingleAdd - - -def num_bits(n): - return int(log(n) / log(2)) - -class FPADDInMuxPipe(PriorityCombMuxInPipe): - def __init__(self, num_rows, iospecfn): - self.num_rows = num_rows - stage = PassThroughStage(iospecfn) - PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows) - - -class FPADDMuxOutPipe(CombMuxOutPipe): - def __init__(self, num_rows, iospecfn): - self.num_rows = num_rows - stage = PassThroughStage(iospecfn) - CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows) - - -class ReservationStations: - """ Reservation-Station pipeline - - Input: num_rows - number of input and output Reservation Stations - - Requires: the addition of an "alu" object, an i_specfn and an o_specfn - - * fan-in on inputs (an array of FPADDBaseData: a,b,mid) - * ALU pipeline - * fan-out on outputs (an array of FPPackData: z,mid) - - Fan-in and Fan-out are combinatorial. - """ - def __init__(self, num_rows): - self.num_rows = num_rows - self.inpipe = FPADDInMuxPipe(num_rows, self.i_specfn) # fan-in - self.outpipe = FPADDMuxOutPipe(num_rows, self.o_specfn) # fan-out - - self.p = self.inpipe.p # kinda annoying, - self.n = self.outpipe.n # use pipe in/out as this class in/out - self._ports = self.inpipe.ports() + self.outpipe.ports() - - def elaborate(self, platform): - m = Module() - m.submodules.inpipe = self.inpipe - m.submodules.alu = self.alu - m.submodules.outpipe = self.outpipe - - m.d.comb += self.inpipe.n.connect_to_next(self.alu.p) - m.d.comb += self.alu.connect_to_next(self.outpipe) - - return m - - def ports(self): - return self._ports - - diff --git a/src/add/dual_add_experiment.py b/src/add/dual_add_experiment.py deleted file mode 100644 index 7ec479f5..00000000 --- a/src/add/dual_add_experiment.py +++ /dev/null @@ -1,72 +0,0 @@ -from nmigen import * -from nmigen.cli import main - -from nmigen_add_experiment import FPADD -from fpbase import FPOp - - -class Adder: - def __init__(self, width): - self.a = Signal(width) - self.b = Signal(width) - self.o = Signal(width) - - def elaborate(self, platform): - m = Module() - m.d.comb += self.o.eq(self.a + self.b) - return m - - -class Subtractor: - def __init__(self, width): - self.a = Signal(width) - self.b = Signal(width) - self.o = Signal(width) - - def elaborate(self, platform): - m = Module() - m.d.comb += self.o.eq(self.a - self.b) - return m - - -class ALU: - def __init__(self, width): - #self.op = Signal() - self.a = FPOp(width) - self.b = FPOp(width) - self.c = FPOp(width) - self.z = FPOp(width) - self.int_stb = Signal() - - self.add1 = FPADD(width) - self.add2 = FPADD(width) - - def elaborate(self, platform): - m = Module() - m.submodules.add1 = self.add1 - m.submodules.add2 = self.add2 - # join add1 a to a: add1.in_a = a - m.d.comb += self.add1.in_a.chain_from(self.a) - # join add1 b to b: add1.in_b = b - m.d.comb += self.add1.in_b.chain_from(self.b) - # join add2 a to c: add2.in_a = c - m.d.comb += self.add2.in_a.chain_from(self.c) - # join add2 b to add1 z: add2.in_b = add1.out_z - m.d.comb += self.add2.in_b.chain_inv(self.add1.out_z) - # join output from add2 to z: z = add2.out_z - m.d.comb += self.z.chain_from(self.add2.out_z) - # get at add1's stb signal - m.d.comb += self.int_stb.eq(self.add1.out_z.stb) - #with m.If(self.op): - # m.d.comb += self.o.eq(self.sub.o) - #with m.Else(): - # m.d.comb += self.o.eq(self.add.o) - return m - - -if __name__ == "__main__": - alu = ALU(width=16) - main(alu, ports=alu.a.ports() + \ - alu.b.ports() + \ - alu.c.ports() + \ - alu.z.ports()) diff --git a/src/add/example_buf_pipe.py b/src/add/example_buf_pipe.py deleted file mode 100644 index 4bb7cdf1..00000000 --- a/src/add/example_buf_pipe.py +++ /dev/null @@ -1,103 +0,0 @@ -""" Pipeline and BufferedHandshake examples -""" - -from nmoperator import eq -from iocontrol import (PrevControl, NextControl) -from singlepipe import (PrevControl, NextControl, ControlBase, - StageCls, Stage, StageChain, - BufferedHandshake, UnbufferedPipeline) - -from nmigen import Signal, Module -from nmigen.cli import verilog, rtlil - - -class ExampleAddStage(StageCls): - """ an example of how to use the buffered pipeline, as a class instance - """ - - def ispec(self): - """ returns a tuple of input signals which will be the incoming data - """ - return (Signal(16), Signal(16)) - - def ospec(self): - """ returns an output signal which will happen to contain the sum - of the two inputs - """ - return Signal(16) - - def process(self, i): - """ process the input data (sums the values in the tuple) and returns it - """ - return i[0] + i[1] - - -class ExampleBufPipeAdd(BufferedHandshake): - """ an example of how to use the buffered pipeline, using a class instance - """ - - def __init__(self): - addstage = ExampleAddStage() - BufferedHandshake.__init__(self, addstage) - - -class ExampleStage(Stage): - """ an example of how to use the buffered pipeline, in a static class - fashion - """ - - def ispec(): - return Signal(16, name="example_input_signal") - - def ospec(): - return Signal(16, name="example_output_signal") - - def process(i): - """ process the input data and returns it (adds 1) - """ - return i + 1 - - -class ExampleStageCls(StageCls): - """ an example of how to use the buffered pipeline, in a static class - fashion - """ - - def ispec(self): - return Signal(16, name="example_input_signal") - - def ospec(self): - return Signal(16, name="example_output_signal") - - def process(self, i): - """ process the input data and returns it (adds 1) - """ - return i + 1 - - -class ExampleBufPipe(BufferedHandshake): - """ an example of how to use the buffered pipeline. - """ - - def __init__(self): - BufferedHandshake.__init__(self, ExampleStage) - - -class ExamplePipeline(UnbufferedPipeline): - """ an example of how to use the unbuffered pipeline. - """ - - def __init__(self): - UnbufferedPipeline.__init__(self, ExampleStage) - - -if __name__ == '__main__': - dut = ExampleBufPipe() - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_bufpipe.il", "w") as f: - f.write(vl) - - dut = ExamplePipeline() - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_combpipe.il", "w") as f: - f.write(vl) diff --git a/src/add/fadd_state.py b/src/add/fadd_state.py deleted file mode 100644 index 7ad88786..00000000 --- a/src/add/fadd_state.py +++ /dev/null @@ -1,282 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal, Cat -from nmigen.cli import main, verilog - -from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase - -from singlepipe import eq - - -class FPADD(FPBase): - - def __init__(self, width, single_cycle=False): - FPBase.__init__(self) - self.width = width - self.single_cycle = single_cycle - - self.in_a = FPOp(width) - self.in_b = FPOp(width) - self.out_z = FPOp(width) - - def elaborate(self, platform=None): - """ creates the HDL code-fragment for FPAdd - """ - m = Module() - - # Latches - a = FPNumIn(self.in_a, self.width) - b = FPNumIn(self.in_b, self.width) - z = FPNumOut(self.width, False) - - m.submodules.fpnum_a = a - m.submodules.fpnum_b = b - m.submodules.fpnum_z = z - - m.d.comb += a.v.eq(self.in_a.v) - m.d.comb += b.v.eq(self.in_b.v) - - w = z.m_width + 4 - tot = Signal(w, reset_less=True) # sticky/round/guard, {mantissa} result, 1 overflow - - of = Overflow() - - m.submodules.overflow = of - - with m.FSM() as fsm: - - # ****** - # gets operand a - - with m.State("get_a"): - res = self.get_op(m, self.in_a, a, "get_b") - m.d.sync += eq([a, self.in_a.ack], res) - - # ****** - # gets operand b - - with m.State("get_b"): - res = self.get_op(m, self.in_b, b, "special_cases") - m.d.sync += eq([b, self.in_b.ack], res) - - # ****** - # special cases: NaNs, infs, zeros, denormalised - # NOTE: some of these are unique to add. see "Special Operations" - # https://steve.hollasch.net/cgindex/coding/ieeefloat.html - - with m.State("special_cases"): - - s_nomatch = Signal() - m.d.comb += s_nomatch.eq(a.s != b.s) - - m_match = Signal() - m.d.comb += m_match.eq(a.m == b.m) - - # if a is NaN or b is NaN return NaN - with m.If(a.is_nan | b.is_nan): - m.next = "put_z" - m.d.sync += z.nan(1) - - # XXX WEIRDNESS for FP16 non-canonical NaN handling - # under review - - ## if a is zero and b is NaN return -b - #with m.If(a.is_zero & (a.s==0) & b.is_nan): - # m.next = "put_z" - # m.d.sync += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0])) - - ## if b is zero and a is NaN return -a - #with m.Elif(b.is_zero & (b.s==0) & a.is_nan): - # m.next = "put_z" - # m.d.sync += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0])) - - ## if a is -zero and b is NaN return -b - #with m.Elif(a.is_zero & (a.s==1) & b.is_nan): - # m.next = "put_z" - # m.d.sync += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1)) - - ## if b is -zero and a is NaN return -a - #with m.Elif(b.is_zero & (b.s==1) & a.is_nan): - # m.next = "put_z" - # m.d.sync += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1)) - - # if a is inf return inf (or NaN) - with m.Elif(a.is_inf): - m.next = "put_z" - m.d.sync += z.inf(a.s) - # if a is inf and signs don't match return NaN - with m.If(b.exp_128 & s_nomatch): - m.d.sync += z.nan(1) - - # if b is inf return inf - with m.Elif(b.is_inf): - m.next = "put_z" - m.d.sync += z.inf(b.s) - - # if a is zero and b zero return signed-a/b - with m.Elif(a.is_zero & b.is_zero): - m.next = "put_z" - m.d.sync += z.create(a.s & b.s, b.e, b.m[3:-1]) - - # if a is zero return b - with m.Elif(a.is_zero): - m.next = "put_z" - m.d.sync += z.create(b.s, b.e, b.m[3:-1]) - - # if b is zero return a - with m.Elif(b.is_zero): - m.next = "put_z" - m.d.sync += z.create(a.s, a.e, a.m[3:-1]) - - # if a equal to -b return zero (+ve zero) - with m.Elif(s_nomatch & m_match & (a.e == b.e)): - m.next = "put_z" - m.d.sync += z.zero(0) - - # Denormalised Number checks - with m.Else(): - m.next = "align" - self.denormalise(m, a) - self.denormalise(m, b) - - # ****** - # align. - - with m.State("align"): - if not self.single_cycle: - # NOTE: this does *not* do single-cycle multi-shifting, - # it *STAYS* in the align state until exponents match - - # exponent of a greater than b: shift b down - with m.If(a.e > b.e): - m.d.sync += b.shift_down() - # exponent of b greater than a: shift a down - with m.Elif(a.e < b.e): - m.d.sync += a.shift_down() - # exponents equal: move to next stage. - with m.Else(): - m.next = "add_0" - else: - # This one however (single-cycle) will do the shift - # in one go. - - # XXX TODO: the shifter used here is quite expensive - # having only one would be better - - ediff = Signal((len(a.e), True), reset_less=True) - ediffr = Signal((len(a.e), True), reset_less=True) - m.d.comb += ediff.eq(a.e - b.e) - m.d.comb += ediffr.eq(b.e - a.e) - with m.If(ediff > 0): - m.d.sync += b.shift_down_multi(ediff) - # exponent of b greater than a: shift a down - with m.Elif(ediff < 0): - m.d.sync += a.shift_down_multi(ediffr) - - m.next = "add_0" - - # ****** - # First stage of add. covers same-sign (add) and subtract - # special-casing when mantissas are greater or equal, to - # give greatest accuracy. - - with m.State("add_0"): - m.next = "add_1" - m.d.sync += z.e.eq(a.e) - # same-sign (both negative or both positive) add mantissas - with m.If(a.s == b.s): - m.d.sync += [ - tot.eq(Cat(a.m, 0) + Cat(b.m, 0)), - z.s.eq(a.s) - ] - # a mantissa greater than b, use a - with m.Elif(a.m >= b.m): - m.d.sync += [ - tot.eq(Cat(a.m, 0) - Cat(b.m, 0)), - z.s.eq(a.s) - ] - # b mantissa greater than a, use b - with m.Else(): - m.d.sync += [ - tot.eq(Cat(b.m, 0) - Cat(a.m, 0)), - z.s.eq(b.s) - ] - - # ****** - # Second stage of add: preparation for normalisation. - # detects when tot sum is too big (tot[27] is kinda a carry bit) - - with m.State("add_1"): - m.next = "normalise_1" - # tot[27] gets set when the sum overflows. shift result down - with m.If(tot[-1]): - m.d.sync += [ - z.m.eq(tot[4:]), - of.m0.eq(tot[4]), - of.guard.eq(tot[3]), - of.round_bit.eq(tot[2]), - of.sticky.eq(tot[1] | tot[0]), - z.e.eq(z.e + 1) - ] - # tot[27] zero case - with m.Else(): - m.d.sync += [ - z.m.eq(tot[3:]), - of.m0.eq(tot[3]), - of.guard.eq(tot[2]), - of.round_bit.eq(tot[1]), - of.sticky.eq(tot[0]) - ] - - # ****** - # First stage of normalisation. - - with m.State("normalise_1"): - self.normalise_1(m, z, of, "normalise_2") - - # ****** - # Second stage of normalisation. - - with m.State("normalise_2"): - self.normalise_2(m, z, of, "round") - - # ****** - # rounding stage - - with m.State("round"): - self.roundz(m, z, of.roundz) - m.next = "corrections" - - # ****** - # correction stage - - with m.State("corrections"): - self.corrections(m, z, "pack") - - # ****** - # pack stage - - with m.State("pack"): - self.pack(m, z, "put_z") - - # ****** - # put_z stage - - with m.State("put_z"): - self.put_z(m, z, self.out_z, "get_a") - - return m - - -if __name__ == "__main__": - alu = FPADD(width=32) - main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports()) - - - # works... but don't use, just do "python fname.py convert -t v" - #print (verilog.convert(alu, ports=[ - # ports=alu.in_a.ports() + \ - # alu.in_b.ports() + \ - # alu.out_z.ports()) diff --git a/src/add/fmul.py b/src/add/fmul.py deleted file mode 100644 index a2ba41e7..00000000 --- a/src/add/fmul.py +++ /dev/null @@ -1,172 +0,0 @@ -from nmigen import Module, Signal, Cat, Mux, Array, Const -from nmigen.cli import main, verilog - -from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPState -from fpcommon.getop import FPGetOp -from singlepipe import eq - - -class FPMUL(FPBase): - - def __init__(self, width): - FPBase.__init__(self) - self.width = width - - self.in_a = FPOp(width) - self.in_b = FPOp(width) - self.out_z = FPOp(width) - - self.states = [] - - def add_state(self, state): - self.states.append(state) - return state - - def elaborate(self, platform=None): - """ creates the HDL code-fragment for FPMUL - """ - m = Module() - - # Latches - a = FPNumIn(None, self.width, False) - b = FPNumIn(None, self.width, False) - z = FPNumOut(self.width, False) - - mw = (z.m_width)*2 - 1 + 3 # sticky/round/guard bits + (2*mant) - 1 - product = Signal(mw) - - of = Overflow() - m.submodules.of = of - m.submodules.a = a - m.submodules.b = b - m.submodules.z = z - - m.d.comb += a.v.eq(self.in_a.v) - m.d.comb += b.v.eq(self.in_b.v) - - with m.FSM() as fsm: - - # ****** - # gets operand a - - with m.State("get_a"): - res = self.get_op(m, self.in_a, a, "get_b") - m.d.sync += eq([a, self.in_a.ack], res) - - # ****** - # gets operand b - - with m.State("get_b"): - res = self.get_op(m, self.in_b, b, "special_cases") - m.d.sync += eq([b, self.in_b.ack], res) - - # ****** - # special cases - - with m.State("special_cases"): - #if a or b is NaN return NaN - with m.If(a.is_nan | b.is_nan): - m.next = "put_z" - m.d.sync += z.nan(1) - #if a is inf return inf - with m.Elif(a.is_inf): - m.next = "put_z" - m.d.sync += z.inf(a.s ^ b.s) - #if b is zero return NaN - with m.If(b.is_zero): - m.d.sync += z.nan(1) - #if b is inf return inf - with m.Elif(b.is_inf): - m.next = "put_z" - m.d.sync += z.inf(a.s ^ b.s) - #if a is zero return NaN - with m.If(a.is_zero): - m.next = "put_z" - m.d.sync += z.nan(1) - #if a is zero return zero - with m.Elif(a.is_zero): - m.next = "put_z" - m.d.sync += z.zero(a.s ^ b.s) - #if b is zero return zero - with m.Elif(b.is_zero): - m.next = "put_z" - m.d.sync += z.zero(a.s ^ b.s) - # Denormalised Number checks - with m.Else(): - m.next = "normalise_a" - self.denormalise(m, a) - self.denormalise(m, b) - - # ****** - # normalise_a - - with m.State("normalise_a"): - self.op_normalise(m, a, "normalise_b") - - # ****** - # normalise_b - - with m.State("normalise_b"): - self.op_normalise(m, b, "multiply_0") - - #multiply_0 - with m.State("multiply_0"): - m.next = "multiply_1" - m.d.sync += [ - z.s.eq(a.s ^ b.s), - z.e.eq(a.e + b.e + 1), - product.eq(a.m * b.m * 4) - ] - - #multiply_1 - with m.State("multiply_1"): - mw = z.m_width - m.next = "normalise_1" - m.d.sync += [ - z.m.eq(product[mw+2:]), - of.guard.eq(product[mw+1]), - of.round_bit.eq(product[mw]), - of.sticky.eq(product[0:mw] != 0) - ] - - # ****** - # First stage of normalisation. - with m.State("normalise_1"): - self.normalise_1(m, z, of, "normalise_2") - - # ****** - # Second stage of normalisation. - - with m.State("normalise_2"): - self.normalise_2(m, z, of, "round") - - # ****** - # rounding stage - - with m.State("round"): - self.roundz(m, z, of.roundz) - m.next = "corrections" - - # ****** - # correction stage - - with m.State("corrections"): - self.corrections(m, z, "pack") - - # ****** - # pack stage - with m.State("pack"): - self.pack(m, z, "put_z") - - # ****** - # put_z stage - - with m.State("put_z"): - self.put_z(m, z, self.out_z, "get_a") - - return m - - -if __name__ == "__main__": - alu = FPMUL(width=32) - main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports()) diff --git a/src/add/fpadd/__init__.py b/src/add/fpadd/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/add/fpadd/add0.py b/src/add/fpadd/add0.py deleted file mode 100644 index 76790fe2..00000000 --- a/src/add/fpadd/add0.py +++ /dev/null @@ -1,113 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal, Cat, Elaboratable -from nmigen.cli import main, verilog - -from fpbase import FPNumBase -from fpbase import FPState -from fpcommon.denorm import FPSCData - - -class FPAddStage0Data: - - def __init__(self, width, id_wid): - self.z = FPNumBase(width, False) - self.out_do_z = Signal(reset_less=True) - self.oz = Signal(width, reset_less=True) - self.tot = Signal(self.z.m_width + 4, reset_less=True) - self.mid = Signal(id_wid, reset_less=True) - - def eq(self, i): - return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz), - self.tot.eq(i.tot), self.mid.eq(i.mid)] - - -class FPAddStage0Mod(Elaboratable): - - def __init__(self, width, id_wid): - self.width = width - self.id_wid = id_wid - self.i = self.ispec() - self.o = self.ospec() - - def ispec(self): - return FPSCData(self.width, self.id_wid) - - def ospec(self): - return FPAddStage0Data(self.width, self.id_wid) - - def process(self, i): - return self.o - - def setup(self, m, i): - """ links module to inputs and outputs - """ - m.submodules.add0 = self - m.d.comb += self.i.eq(i) - - def elaborate(self, platform): - m = Module() - m.submodules.add0_in_a = self.i.a - m.submodules.add0_in_b = self.i.b - m.submodules.add0_out_z = self.o.z - - # store intermediate tests (and zero-extended mantissas) - seq = Signal(reset_less=True) - mge = Signal(reset_less=True) - am0 = Signal(len(self.i.a.m)+1, reset_less=True) - bm0 = Signal(len(self.i.b.m)+1, reset_less=True) - m.d.comb += [seq.eq(self.i.a.s == self.i.b.s), - mge.eq(self.i.a.m >= self.i.b.m), - am0.eq(Cat(self.i.a.m, 0)), - bm0.eq(Cat(self.i.b.m, 0)) - ] - # same-sign (both negative or both positive) add mantissas - with m.If(~self.i.out_do_z): - m.d.comb += self.o.z.e.eq(self.i.a.e) - with m.If(seq): - m.d.comb += [ - self.o.tot.eq(am0 + bm0), - self.o.z.s.eq(self.i.a.s) - ] - # a mantissa greater than b, use a - with m.Elif(mge): - m.d.comb += [ - self.o.tot.eq(am0 - bm0), - self.o.z.s.eq(self.i.a.s) - ] - # b mantissa greater than a, use b - with m.Else(): - m.d.comb += [ - self.o.tot.eq(bm0 - am0), - self.o.z.s.eq(self.i.b.s) - ] - - m.d.comb += self.o.oz.eq(self.i.oz) - m.d.comb += self.o.out_do_z.eq(self.i.out_do_z) - m.d.comb += self.o.mid.eq(self.i.mid) - return m - - -class FPAddStage0(FPState): - """ First stage of add. covers same-sign (add) and subtract - special-casing when mantissas are greater or equal, to - give greatest accuracy. - """ - - def __init__(self, width, id_wid): - FPState.__init__(self, "add_0") - self.mod = FPAddStage0Mod(width) - self.o = self.mod.ospec() - - def setup(self, m, i): - """ links module to inputs and outputs - """ - self.mod.setup(m, i) - - # NOTE: these could be done as combinatorial (merge add0+add1) - m.d.sync += self.o.eq(self.mod.o) - - def action(self, m): - m.next = "add_1" diff --git a/src/add/fpadd/add1.py b/src/add/fpadd/add1.py deleted file mode 100644 index 679f5176..00000000 --- a/src/add/fpadd/add1.py +++ /dev/null @@ -1,95 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal, Elaboratable -from nmigen.cli import main, verilog -from math import log - -from fpbase import FPState -from fpcommon.postcalc import FPAddStage1Data -from fpadd.add0 import FPAddStage0Data - - -class FPAddStage1Mod(FPState, Elaboratable): - """ Second stage of add: preparation for normalisation. - detects when tot sum is too big (tot[27] is kinda a carry bit) - """ - - def __init__(self, width, id_wid): - self.width = width - self.id_wid = id_wid - self.i = self.ispec() - self.o = self.ospec() - - def ispec(self): - return FPAddStage0Data(self.width, self.id_wid) - - def ospec(self): - return FPAddStage1Data(self.width, self.id_wid) - - def process(self, i): - return self.o - - def setup(self, m, i): - """ links module to inputs and outputs - """ - m.submodules.add1 = self - m.submodules.add1_out_overflow = self.o.of - - m.d.comb += self.i.eq(i) - - def elaborate(self, platform): - m = Module() - m.d.comb += self.o.z.eq(self.i.z) - # tot[-1] (MSB) gets set when the sum overflows. shift result down - with m.If(~self.i.out_do_z): - with m.If(self.i.tot[-1]): - m.d.comb += [ - self.o.z.m.eq(self.i.tot[4:]), - self.o.of.m0.eq(self.i.tot[4]), - self.o.of.guard.eq(self.i.tot[3]), - self.o.of.round_bit.eq(self.i.tot[2]), - self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]), - self.o.z.e.eq(self.i.z.e + 1) - ] - # tot[-1] (MSB) zero case - with m.Else(): - m.d.comb += [ - self.o.z.m.eq(self.i.tot[3:]), - self.o.of.m0.eq(self.i.tot[3]), - self.o.of.guard.eq(self.i.tot[2]), - self.o.of.round_bit.eq(self.i.tot[1]), - self.o.of.sticky.eq(self.i.tot[0]) - ] - - m.d.comb += self.o.out_do_z.eq(self.i.out_do_z) - m.d.comb += self.o.oz.eq(self.i.oz) - m.d.comb += self.o.mid.eq(self.i.mid) - - return m - - -class FPAddStage1(FPState): - - def __init__(self, width, id_wid): - FPState.__init__(self, "add_1") - self.mod = FPAddStage1Mod(width) - self.out_z = FPNumBase(width, False) - self.out_of = Overflow() - self.norm_stb = Signal() - - def setup(self, m, i): - """ links module to inputs and outputs - """ - self.mod.setup(m, i) - - m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state - - m.d.sync += self.out_of.eq(self.mod.out_of) - m.d.sync += self.out_z.eq(self.mod.out_z) - m.d.sync += self.norm_stb.eq(1) - - def action(self, m): - m.next = "normalise_1" - diff --git a/src/add/fpadd/addstages.py b/src/add/fpadd/addstages.py deleted file mode 100644 index f5703aec..00000000 --- a/src/add/fpadd/addstages.py +++ /dev/null @@ -1,55 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module -from nmigen.cli import main, verilog - -from singlepipe import (StageChain, SimpleHandshake, - PassThroughStage) - -from fpbase import FPState -from fpcommon.denorm import FPSCData -from fpcommon.postcalc import FPAddStage1Data -from fpadd.align import FPAddAlignSingleMod -from fpadd.add0 import FPAddStage0Mod -from fpadd.add1 import FPAddStage1Mod - - -class FPAddAlignSingleAdd(FPState, SimpleHandshake): - - def __init__(self, width, id_wid): - FPState.__init__(self, "align") - self.width = width - self.id_wid = id_wid - SimpleHandshake.__init__(self, self) # pipeline is its own stage - self.a1o = self.ospec() - - def ispec(self): - return FPSCData(self.width, self.id_wid) - - def ospec(self): - return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec - - def setup(self, m, i): - """ links module to inputs and outputs - """ - - # chain AddAlignSingle, AddStage0 and AddStage1 - mod = FPAddAlignSingleMod(self.width, self.id_wid) - a0mod = FPAddStage0Mod(self.width, self.id_wid) - a1mod = FPAddStage1Mod(self.width, self.id_wid) - - chain = StageChain([mod, a0mod, a1mod]) - chain.setup(m, i) - - self.o = a1mod.o - - def process(self, i): - return self.o - - def action(self, m): - m.d.sync += self.a1o.eq(self.process(None)) - m.next = "normalise_1" - - diff --git a/src/add/fpadd/align.py b/src/add/fpadd/align.py deleted file mode 100644 index 9837a0b8..00000000 --- a/src/add/fpadd/align.py +++ /dev/null @@ -1,211 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal -from nmigen.cli import main, verilog - -from fpbase import FPNumOut, FPNumIn, FPNumBase -from fpbase import MultiShiftRMerge -from fpbase import FPState -from fpcommon.denorm import FPSCData - - -class FPNumIn2Ops: - - def __init__(self, width, id_wid): - self.a = FPNumIn(None, width) - self.b = FPNumIn(None, width) - self.z = FPNumOut(width, False) - self.out_do_z = Signal(reset_less=True) - self.oz = Signal(width, reset_less=True) - self.mid = Signal(id_wid, reset_less=True) - - def eq(self, i): - return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz), - self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)] - - - -class FPAddAlignMultiMod(FPState): - - def __init__(self, width): - self.in_a = FPNumBase(width) - self.in_b = FPNumBase(width) - self.out_a = FPNumIn(None, width) - self.out_b = FPNumIn(None, width) - self.exp_eq = Signal(reset_less=True) - - def elaborate(self, platform): - # This one however (single-cycle) will do the shift - # in one go. - - m = Module() - - m.submodules.align_in_a = self.in_a - m.submodules.align_in_b = self.in_b - m.submodules.align_out_a = self.out_a - m.submodules.align_out_b = self.out_b - - # NOTE: this does *not* do single-cycle multi-shifting, - # it *STAYS* in the align state until exponents match - - # exponent of a greater than b: shift b down - m.d.comb += self.exp_eq.eq(0) - m.d.comb += self.out_a.eq(self.in_a) - m.d.comb += self.out_b.eq(self.in_b) - agtb = Signal(reset_less=True) - altb = Signal(reset_less=True) - m.d.comb += agtb.eq(self.in_a.e > self.in_b.e) - m.d.comb += altb.eq(self.in_a.e < self.in_b.e) - with m.If(agtb): - m.d.comb += self.out_b.shift_down(self.in_b) - # exponent of b greater than a: shift a down - with m.Elif(altb): - m.d.comb += self.out_a.shift_down(self.in_a) - # exponents equal: move to next stage. - with m.Else(): - m.d.comb += self.exp_eq.eq(1) - return m - - -class FPAddAlignMulti(FPState): - - def __init__(self, width, id_wid): - FPState.__init__(self, "align") - self.mod = FPAddAlignMultiMod(width) - self.out_a = FPNumIn(None, width) - self.out_b = FPNumIn(None, width) - self.exp_eq = Signal(reset_less=True) - - def setup(self, m, in_a, in_b): - """ links module to inputs and outputs - """ - m.submodules.align = self.mod - m.d.comb += self.mod.in_a.eq(in_a) - m.d.comb += self.mod.in_b.eq(in_b) - m.d.comb += self.exp_eq.eq(self.mod.exp_eq) - m.d.sync += self.out_a.eq(self.mod.out_a) - m.d.sync += self.out_b.eq(self.mod.out_b) - - def action(self, m): - with m.If(self.exp_eq): - m.next = "add_0" - - -class FPAddAlignSingleMod: - - def __init__(self, width, id_wid): - self.width = width - self.id_wid = id_wid - self.i = self.ispec() - self.o = self.ospec() - - def ispec(self): - return FPSCData(self.width, self.id_wid) - - def ospec(self): - return FPNumIn2Ops(self.width, self.id_wid) - - def process(self, i): - return self.o - - def setup(self, m, i): - """ links module to inputs and outputs - """ - m.submodules.align = self - m.d.comb += self.i.eq(i) - - def elaborate(self, platform): - """ Aligns A against B or B against A, depending on which has the - greater exponent. This is done in a *single* cycle using - variable-width bit-shift - - the shifter used here is quite expensive in terms of gates. - Mux A or B in (and out) into temporaries, as only one of them - needs to be aligned against the other - """ - m = Module() - - m.submodules.align_in_a = self.i.a - m.submodules.align_in_b = self.i.b - m.submodules.align_out_a = self.o.a - m.submodules.align_out_b = self.o.b - - # temporary (muxed) input and output to be shifted - t_inp = FPNumBase(self.width) - t_out = FPNumIn(None, self.width) - espec = (len(self.i.a.e), True) - msr = MultiShiftRMerge(self.i.a.m_width, espec) - m.submodules.align_t_in = t_inp - m.submodules.align_t_out = t_out - m.submodules.multishift_r = msr - - ediff = Signal(espec, reset_less=True) - ediffr = Signal(espec, reset_less=True) - tdiff = Signal(espec, reset_less=True) - elz = Signal(reset_less=True) - egz = Signal(reset_less=True) - - # connect multi-shifter to t_inp/out mantissa (and tdiff) - m.d.comb += msr.inp.eq(t_inp.m) - m.d.comb += msr.diff.eq(tdiff) - m.d.comb += t_out.m.eq(msr.m) - m.d.comb += t_out.e.eq(t_inp.e + tdiff) - m.d.comb += t_out.s.eq(t_inp.s) - - m.d.comb += ediff.eq(self.i.a.e - self.i.b.e) - m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e) - m.d.comb += elz.eq(self.i.a.e < self.i.b.e) - m.d.comb += egz.eq(self.i.a.e > self.i.b.e) - - # default: A-exp == B-exp, A and B untouched (fall through) - m.d.comb += self.o.a.eq(self.i.a) - m.d.comb += self.o.b.eq(self.i.b) - # only one shifter (muxed) - #m.d.comb += t_out.shift_down_multi(tdiff, t_inp) - # exponent of a greater than b: shift b down - with m.If(~self.i.out_do_z): - with m.If(egz): - m.d.comb += [t_inp.eq(self.i.b), - tdiff.eq(ediff), - self.o.b.eq(t_out), - self.o.b.s.eq(self.i.b.s), # whoops forgot sign - ] - # exponent of b greater than a: shift a down - with m.Elif(elz): - m.d.comb += [t_inp.eq(self.i.a), - tdiff.eq(ediffr), - self.o.a.eq(t_out), - self.o.a.s.eq(self.i.a.s), # whoops forgot sign - ] - - m.d.comb += self.o.mid.eq(self.i.mid) - m.d.comb += self.o.z.eq(self.i.z) - m.d.comb += self.o.out_do_z.eq(self.i.out_do_z) - m.d.comb += self.o.oz.eq(self.i.oz) - - return m - - -class FPAddAlignSingle(FPState): - - def __init__(self, width, id_wid): - FPState.__init__(self, "align") - self.mod = FPAddAlignSingleMod(width, id_wid) - self.out_a = FPNumIn(None, width) - self.out_b = FPNumIn(None, width) - - def setup(self, m, i): - """ links module to inputs and outputs - """ - self.mod.setup(m, i) - - # NOTE: could be done as comb - m.d.sync += self.out_a.eq(self.mod.out_a) - m.d.sync += self.out_b.eq(self.mod.out_b) - - def action(self, m): - m.next = "add_0" - - diff --git a/src/add/fpadd/pipeline.py b/src/add/fpadd/pipeline.py deleted file mode 100644 index e244ee60..00000000 --- a/src/add/fpadd/pipeline.py +++ /dev/null @@ -1,59 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module -from nmigen.cli import main, verilog - -from singlepipe import (ControlBase, SimpleHandshake, PassThroughStage) -from multipipe import CombMuxOutPipe -from multipipe import PriorityCombMuxInPipe - -from fpcommon.getop import FPADDBaseData -from fpcommon.denorm import FPSCData -from fpcommon.pack import FPPackData -from fpcommon.normtopack import FPNormToPack -from fpadd.specialcases import FPAddSpecialCasesDeNorm -from fpadd.addstages import FPAddAlignSingleAdd - -from concurrentunit import ReservationStations, num_bits - - -class FPADDBasePipe(ControlBase): - def __init__(self, width, id_wid): - ControlBase.__init__(self) - self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid) - self.pipe2 = FPAddAlignSingleAdd(width, id_wid) - self.pipe3 = FPNormToPack(width, id_wid) - - self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3]) - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - m.submodules.scnorm = self.pipe1 - m.submodules.addalign = self.pipe2 - m.submodules.normpack = self.pipe3 - m.d.comb += self._eqs - return m - - -class FPADDMuxInOut(ReservationStations): - """ Reservation-Station version of FPADD pipeline. - - * fan-in on inputs (an array of FPADDBaseData: a,b,mid) - * 3-stage adder pipeline - * fan-out on outputs (an array of FPPackData: z,mid) - - Fan-in and Fan-out are combinatorial. - """ - def __init__(self, width, num_rows): - self.width = width - self.id_wid = num_bits(width) - self.alu = FPADDBasePipe(width, self.id_wid) - ReservationStations.__init__(self, num_rows) - - def i_specfn(self): - return FPADDBaseData(self.width, self.id_wid) - - def o_specfn(self): - return FPPackData(self.width, self.id_wid) diff --git a/src/add/fpadd/specialcases.py b/src/add/fpadd/specialcases.py deleted file mode 100644 index 6f9d1a08..00000000 --- a/src/add/fpadd/specialcases.py +++ /dev/null @@ -1,223 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal, Cat, Const -from nmigen.cli import main, verilog -from math import log - -from fpbase import FPNumDecode -from singlepipe import SimpleHandshake, StageChain - -from fpbase import FPState, FPID -from fpcommon.getop import FPADDBaseData -from fpcommon.denorm import (FPSCData, FPAddDeNormMod) - - -class FPAddSpecialCasesMod: - """ special cases: NaNs, infs, zeros, denormalised - NOTE: some of these are unique to add. see "Special Operations" - https://steve.hollasch.net/cgindex/coding/ieeefloat.html - """ - - def __init__(self, width, id_wid): - self.width = width - self.id_wid = id_wid - self.i = self.ispec() - self.o = self.ospec() - - def ispec(self): - return FPADDBaseData(self.width, self.id_wid) - - def ospec(self): - return FPSCData(self.width, self.id_wid) - - def setup(self, m, i): - """ links module to inputs and outputs - """ - m.submodules.specialcases = self - m.d.comb += self.i.eq(i) - - def process(self, i): - return self.o - - def elaborate(self, platform): - m = Module() - - m.submodules.sc_out_z = self.o.z - - # decode: XXX really should move to separate stage - a1 = FPNumDecode(None, self.width) - b1 = FPNumDecode(None, self.width) - m.submodules.sc_decode_a = a1 - m.submodules.sc_decode_b = b1 - m.d.comb += [a1.v.eq(self.i.a), - b1.v.eq(self.i.b), - self.o.a.eq(a1), - self.o.b.eq(b1) - ] - - s_nomatch = Signal(reset_less=True) - m.d.comb += s_nomatch.eq(a1.s != b1.s) - - m_match = Signal(reset_less=True) - m.d.comb += m_match.eq(a1.m == b1.m) - - e_match = Signal(reset_less=True) - m.d.comb += e_match.eq(a1.e == b1.e) - - aeqmb = Signal(reset_less=True) - m.d.comb += aeqmb.eq(s_nomatch & m_match & e_match) - - abz = Signal(reset_less=True) - m.d.comb += abz.eq(a1.is_zero & b1.is_zero) - - abnan = Signal(reset_less=True) - m.d.comb += abnan.eq(a1.is_nan | b1.is_nan) - - bexp128s = Signal(reset_less=True) - m.d.comb += bexp128s.eq(b1.exp_128 & s_nomatch) - - # if a is NaN or b is NaN return NaN - with m.If(abnan): - m.d.comb += self.o.out_do_z.eq(1) - m.d.comb += self.o.z.nan(0) - - # XXX WEIRDNESS for FP16 non-canonical NaN handling - # under review - - ## if a is zero and b is NaN return -b - #with m.If(a.is_zero & (a.s==0) & b.is_nan): - # m.d.comb += self.o.out_do_z.eq(1) - # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0])) - - ## if b is zero and a is NaN return -a - #with m.Elif(b.is_zero & (b.s==0) & a.is_nan): - # m.d.comb += self.o.out_do_z.eq(1) - # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0])) - - ## if a is -zero and b is NaN return -b - #with m.Elif(a.is_zero & (a.s==1) & b.is_nan): - # m.d.comb += self.o.out_do_z.eq(1) - # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1)) - - ## if b is -zero and a is NaN return -a - #with m.Elif(b.is_zero & (b.s==1) & a.is_nan): - # m.d.comb += self.o.out_do_z.eq(1) - # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1)) - - # if a is inf return inf (or NaN) - with m.Elif(a1.is_inf): - m.d.comb += self.o.out_do_z.eq(1) - m.d.comb += self.o.z.inf(a1.s) - # if a is inf and signs don't match return NaN - with m.If(bexp128s): - m.d.comb += self.o.z.nan(0) - - # if b is inf return inf - with m.Elif(b1.is_inf): - m.d.comb += self.o.out_do_z.eq(1) - m.d.comb += self.o.z.inf(b1.s) - - # if a is zero and b zero return signed-a/b - with m.Elif(abz): - m.d.comb += self.o.out_do_z.eq(1) - m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1]) - - # if a is zero return b - with m.Elif(a1.is_zero): - m.d.comb += self.o.out_do_z.eq(1) - m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1]) - - # if b is zero return a - with m.Elif(b1.is_zero): - m.d.comb += self.o.out_do_z.eq(1) - m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1]) - - # if a equal to -b return zero (+ve zero) - with m.Elif(aeqmb): - m.d.comb += self.o.out_do_z.eq(1) - m.d.comb += self.o.z.zero(0) - - # Denormalised Number checks next, so pass a/b data through - with m.Else(): - m.d.comb += self.o.out_do_z.eq(0) - - m.d.comb += self.o.oz.eq(self.o.z.v) - m.d.comb += self.o.mid.eq(self.i.mid) - - return m - - -class FPAddSpecialCases(FPState): - """ special cases: NaNs, infs, zeros, denormalised - NOTE: some of these are unique to add. see "Special Operations" - https://steve.hollasch.net/cgindex/coding/ieeefloat.html - """ - - def __init__(self, width, id_wid): - FPState.__init__(self, "special_cases") - self.mod = FPAddSpecialCasesMod(width) - self.out_z = self.mod.ospec() - self.out_do_z = Signal(reset_less=True) - - def setup(self, m, i): - """ links module to inputs and outputs - """ - self.mod.setup(m, i, self.out_do_z) - m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output - m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid) - - def action(self, m): - self.idsync(m) - with m.If(self.out_do_z): - m.next = "put_z" - with m.Else(): - m.next = "denormalise" - - -class FPAddSpecialCasesDeNorm(FPState, SimpleHandshake): - """ special cases: NaNs, infs, zeros, denormalised - NOTE: some of these are unique to add. see "Special Operations" - https://steve.hollasch.net/cgindex/coding/ieeefloat.html - """ - - def __init__(self, width, id_wid): - FPState.__init__(self, "special_cases") - self.width = width - self.id_wid = id_wid - SimpleHandshake.__init__(self, self) # pipe is its own stage - self.out = self.ospec() - - def ispec(self): - return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec - - def ospec(self): - return FPSCData(self.width, self.id_wid) # DeNorm ospec - - def setup(self, m, i): - """ links module to inputs and outputs - """ - smod = FPAddSpecialCasesMod(self.width, self.id_wid) - dmod = FPAddDeNormMod(self.width, self.id_wid) - - chain = StageChain([smod, dmod]) - chain.setup(m, i) - - # only needed for break-out (early-out) - # self.out_do_z = smod.o.out_do_z - - self.o = dmod.o - - def process(self, i): - return self.o - - def action(self, m): - # for break-out (early-out) - #with m.If(self.out_do_z): - # m.next = "put_z" - #with m.Else(): - m.d.sync += self.out.eq(self.process(None)) - m.next = "align" - - diff --git a/src/add/fpadd/statemachine.py b/src/add/fpadd/statemachine.py deleted file mode 100644 index 4418b3fa..00000000 --- a/src/add/fpadd/statemachine.py +++ /dev/null @@ -1,376 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal, Cat, Mux, Array, Const -from nmigen.cli import main, verilog -from math import log - -from fpbase import FPOpIn, FPOpOut -from fpbase import Trigger -from singlepipe import (StageChain, SimpleHandshake) - -from fpbase import FPState, FPID -from fpcommon.getop import (FPGetOp, FPADDBaseData, FPGet2Op) -from fpcommon.denorm import (FPSCData, FPAddDeNorm) -from fpcommon.postcalc import FPAddStage1Data -from fpcommon.postnormalise import (FPNorm1Data, - FPNorm1Single, FPNorm1Multi) -from fpcommon.roundz import (FPRoundData, FPRound) -from fpcommon.corrections import FPCorrections -from fpcommon.pack import (FPPackData, FPPackMod, FPPack) -from fpcommon.normtopack import FPNormToPack -from fpcommon.putz import (FPPutZ, FPPutZIdx) - -from fpadd.specialcases import (FPAddSpecialCases, FPAddSpecialCasesDeNorm) -from fpadd.align import (FPAddAlignMulti, FPAddAlignSingle) -from fpadd.add0 import (FPAddStage0Data, FPAddStage0) -from fpadd.add1 import (FPAddStage1Mod, FPAddStage1) -from fpadd.addstages import FPAddAlignSingleAdd - - -class FPOpData: - def __init__(self, width, id_wid): - self.z = FPOpOut(width) - self.z.data_o = Signal(width) - self.mid = Signal(id_wid, reset_less=True) - - def __iter__(self): - yield self.z - yield self.mid - - def eq(self, i): - return [self.z.eq(i.z), self.mid.eq(i.mid)] - - def ports(self): - return list(self) - - -class FPADDBaseMod: - - def __init__(self, width, id_wid=None, single_cycle=False, compact=True): - """ IEEE754 FP Add - - * width: bit-width of IEEE754. supported: 16, 32, 64 - * id_wid: an identifier that is sync-connected to the input - * single_cycle: True indicates each stage to complete in 1 clock - * compact: True indicates a reduced number of stages - """ - self.width = width - self.id_wid = id_wid - self.single_cycle = single_cycle - self.compact = compact - - self.in_t = Trigger() - self.i = self.ispec() - self.o = self.ospec() - - self.states = [] - - def ispec(self): - return FPADDBaseData(self.width, self.id_wid) - - def ospec(self): - return FPOpData(self.width, self.id_wid) - - def add_state(self, state): - self.states.append(state) - return state - - def elaborate(self, platform=None): - """ creates the HDL code-fragment for FPAdd - """ - m = Module() - m.submodules.out_z = self.o.z - m.submodules.in_t = self.in_t - if self.compact: - self.get_compact_fragment(m, platform) - else: - self.get_longer_fragment(m, platform) - - with m.FSM() as fsm: - - for state in self.states: - with m.State(state.state_from): - state.action(m) - - return m - - def get_longer_fragment(self, m, platform=None): - - get = self.add_state(FPGet2Op("get_ops", "special_cases", - self.width)) - get.setup(m, self.i) - a = get.out_op1 - b = get.out_op2 - get.trigger_setup(m, self.in_t.stb, self.in_t.ack) - - sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid)) - sc.setup(m, a, b, self.in_mid) - - dn = self.add_state(FPAddDeNorm(self.width, self.id_wid)) - dn.setup(m, a, b, sc.in_mid) - - if self.single_cycle: - alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid)) - alm.setup(m, dn.out_a, dn.out_b, dn.in_mid) - else: - alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid)) - alm.setup(m, dn.out_a, dn.out_b, dn.in_mid) - - add0 = self.add_state(FPAddStage0(self.width, self.id_wid)) - add0.setup(m, alm.out_a, alm.out_b, alm.in_mid) - - add1 = self.add_state(FPAddStage1(self.width, self.id_wid)) - add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid) - - if self.single_cycle: - n1 = self.add_state(FPNorm1Single(self.width, self.id_wid)) - n1.setup(m, add1.out_z, add1.out_of, add0.in_mid) - else: - n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid)) - n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid) - - rn = self.add_state(FPRound(self.width, self.id_wid)) - rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid) - - cor = self.add_state(FPCorrections(self.width, self.id_wid)) - cor.setup(m, rn.out_z, rn.in_mid) - - pa = self.add_state(FPPack(self.width, self.id_wid)) - pa.setup(m, cor.out_z, rn.in_mid) - - ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z, - pa.in_mid, self.out_mid)) - - pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z, - pa.in_mid, self.out_mid)) - - def get_compact_fragment(self, m, platform=None): - - get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid) - sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid) - alm = FPAddAlignSingleAdd(self.width, self.id_wid) - n1 = FPNormToPack(self.width, self.id_wid) - - get.trigger_setup(m, self.in_t.stb, self.in_t.ack) - - chainlist = [get, sc, alm, n1] - chain = StageChain(chainlist, specallocate=True) - chain.setup(m, self.i) - - for mod in chainlist: - sc = self.add_state(mod) - - ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o, - n1.out_z.mid, self.o.mid)) - - #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o, - # sc.o.mid, self.o.mid)) - - -class FPADDBase(FPState): - - def __init__(self, width, id_wid=None, single_cycle=False): - """ IEEE754 FP Add - - * width: bit-width of IEEE754. supported: 16, 32, 64 - * id_wid: an identifier that is sync-connected to the input - * single_cycle: True indicates each stage to complete in 1 clock - """ - FPState.__init__(self, "fpadd") - self.width = width - self.single_cycle = single_cycle - self.mod = FPADDBaseMod(width, id_wid, single_cycle) - self.o = self.ospec() - - self.in_t = Trigger() - self.i = self.ispec() - - self.z_done = Signal(reset_less=True) # connects to out_z Strobe - self.in_accept = Signal(reset_less=True) - self.add_stb = Signal(reset_less=True) - self.add_ack = Signal(reset=0, reset_less=True) - - def ispec(self): - return self.mod.ispec() - - def ospec(self): - return self.mod.ospec() - - def setup(self, m, i, add_stb, in_mid): - m.d.comb += [self.i.eq(i), - self.mod.i.eq(self.i), - self.z_done.eq(self.mod.o.z.trigger), - #self.add_stb.eq(add_stb), - self.mod.in_t.stb.eq(self.in_t.stb), - self.in_t.ack.eq(self.mod.in_t.ack), - self.o.mid.eq(self.mod.o.mid), - self.o.z.v.eq(self.mod.o.z.v), - self.o.z.valid_o.eq(self.mod.o.z.valid_o), - self.mod.o.z.ready_i.eq(self.o.z.ready_i_test), - ] - - m.d.sync += self.add_stb.eq(add_stb) - m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state - m.d.sync += self.o.z.ready_i.eq(0) # likewise - #m.d.sync += self.in_t.stb.eq(0) - - m.submodules.fpadd = self.mod - - def action(self, m): - - # in_accept is set on incoming strobe HIGH and ack LOW. - m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb)) - - #with m.If(self.in_t.ack): - # m.d.sync += self.in_t.stb.eq(0) - with m.If(~self.z_done): - # not done: test for accepting an incoming operand pair - with m.If(self.in_accept): - m.d.sync += [ - self.add_ack.eq(1), # acknowledge receipt... - self.in_t.stb.eq(1), # initiate add - ] - with m.Else(): - m.d.sync += [self.add_ack.eq(0), - self.in_t.stb.eq(0), - self.o.z.ready_i.eq(1), - ] - with m.Else(): - # done: acknowledge, and write out id and value - m.d.sync += [self.add_ack.eq(1), - self.in_t.stb.eq(0) - ] - m.next = "put_z" - - return - - if self.in_mid is not None: - m.d.sync += self.out_mid.eq(self.mod.out_mid) - - m.d.sync += [ - self.out_z.v.eq(self.mod.out_z.v) - ] - # move to output state on detecting z ack - with m.If(self.out_z.trigger): - m.d.sync += self.out_z.stb.eq(0) - m.next = "put_z" - with m.Else(): - m.d.sync += self.out_z.stb.eq(1) - - -class FPADD(FPID): - """ FPADD: stages as follows: - - FPGetOp (a) - | - FPGetOp (b) - | - FPAddBase---> FPAddBaseMod - | | - PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ - - FPAddBase is tricky: it is both a stage and *has* stages. - Connection to FPAddBaseMod therefore requires an in stb/ack - and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp - needs to be the thing that raises the incoming stb. - """ - - def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2): - """ IEEE754 FP Add - - * width: bit-width of IEEE754. supported: 16, 32, 64 - * id_wid: an identifier that is sync-connected to the input - * single_cycle: True indicates each stage to complete in 1 clock - """ - self.width = width - self.id_wid = id_wid - self.single_cycle = single_cycle - - #self.out_z = FPOp(width) - self.ids = FPID(id_wid) - - rs = [] - for i in range(rs_sz): - in_a = FPOpIn(width) - in_b = FPOpIn(width) - in_a.data_i = Signal(width) - in_b.data_i = Signal(width) - in_a.name = "in_a_%d" % i - in_b.name = "in_b_%d" % i - rs.append((in_a, in_b)) - self.rs = Array(rs) - - res = [] - for i in range(rs_sz): - out_z = FPOpOut(width) - out_z.data_o = Signal(width) - out_z.name = "out_z_%d" % i - res.append(out_z) - self.res = Array(res) - - self.states = [] - - def add_state(self, state): - self.states.append(state) - return state - - def elaborate(self, platform=None): - """ creates the HDL code-fragment for FPAdd - """ - m = Module() - #m.submodules += self.rs - - in_a = self.rs[0][0] - in_b = self.rs[0][1] - - geta = self.add_state(FPGetOp("get_a", "get_b", - in_a, self.width)) - geta.setup(m, in_a) - a = geta.out_op - - getb = self.add_state(FPGetOp("get_b", "fpadd", - in_b, self.width)) - getb.setup(m, in_b) - b = getb.out_op - - ab = FPADDBase(self.width, self.id_wid, self.single_cycle) - ab = self.add_state(ab) - abd = ab.ispec() # create an input spec object for FPADDBase - m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)] - ab.setup(m, abd, getb.out_decode, self.ids.in_mid) - o = ab.o - - pz = self.add_state(FPPutZIdx("put_z", o.z, self.res, - o.mid, "get_a")) - - with m.FSM() as fsm: - - for state in self.states: - with m.State(state.state_from): - state.action(m) - - return m - - -if __name__ == "__main__": - if True: - alu = FPADD(width=32, id_wid=5, single_cycle=True) - main(alu, ports=alu.rs[0][0].ports() + \ - alu.rs[0][1].ports() + \ - alu.res[0].ports() + \ - [alu.ids.in_mid, alu.ids.out_mid]) - else: - alu = FPADDBase(width=32, id_wid=5, single_cycle=True) - main(alu, ports=[alu.in_a, alu.in_b] + \ - alu.in_t.ports() + \ - alu.out_z.ports() + \ - [alu.in_mid, alu.out_mid]) - - - # works... but don't use, just do "python fname.py convert -t v" - #print (verilog.convert(alu, ports=[ - # ports=alu.in_a.ports() + \ - # alu.in_b.ports() + \ - # alu.out_z.ports()) diff --git a/src/add/fpbase.py b/src/add/fpbase.py deleted file mode 100644 index f4908592..00000000 --- a/src/add/fpbase.py +++ /dev/null @@ -1,733 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Signal, Cat, Const, Mux, Module, Elaboratable -from math import log -from operator import or_ -from functools import reduce - -from singlepipe import PrevControl, NextControl -from pipeline import ObjectProxy - - -class MultiShiftR: - - def __init__(self, width): - self.width = width - self.smax = int(log(width) / log(2)) - self.i = Signal(width, reset_less=True) - self.s = Signal(self.smax, reset_less=True) - self.o = Signal(width, reset_less=True) - - def elaborate(self, platform): - m = Module() - m.d.comb += self.o.eq(self.i >> self.s) - return m - - -class MultiShift: - """ Generates variable-length single-cycle shifter from a series - of conditional tests on each bit of the left/right shift operand. - Each bit tested produces output shifted by that number of bits, - in a binary fashion: bit 1 if set shifts by 1 bit, bit 2 if set - shifts by 2 bits, each partial result cascading to the next Mux. - - Could be adapted to do arithmetic shift by taking copies of the - MSB instead of zeros. - """ - - def __init__(self, width): - self.width = width - self.smax = int(log(width) / log(2)) - - def lshift(self, op, s): - res = op << s - return res[:len(op)] - res = op - for i in range(self.smax): - zeros = [0] * (1<> s - return res[:len(op)] - res = op - for i in range(self.smax): - zeros = [0] * (1< 0) - m.d.comb += self.exp_lt_n126.eq(self.exp_sub_n126 < 0) - m.d.comb += self.exp_gt127.eq(self.e > self.P127) - m.d.comb += self.exp_n127.eq(self.e == self.N127) - m.d.comb += self.exp_n126.eq(self.e == self.N126) - m.d.comb += self.m_zero.eq(self.m == self.mzero) - m.d.comb += self.m_msbzero.eq(self.m[self.e_start] == 0) - - return m - - def _is_nan(self): - return (self.exp_128) & (~self.m_zero) - - def _is_inf(self): - return (self.exp_128) & (self.m_zero) - - def _is_zero(self): - return (self.exp_n127) & (self.m_zero) - - def _is_overflowed(self): - return self.exp_gt127 - - def _is_denormalised(self): - return (self.exp_n126) & (self.m_msbzero) - - def __iter__(self): - yield self.s - yield self.e - yield self.m - - def eq(self, inp): - return [self.s.eq(inp.s), self.e.eq(inp.e), self.m.eq(inp.m)] - - -class FPNumOut(FPNumBase): - """ Floating-point Number Class - - Contains signals for an incoming copy of the value, decoded into - sign / exponent / mantissa. - Also contains encoding functions, creation and recognition of - zero, NaN and inf (all signed) - - Four extra bits are included in the mantissa: the top bit - (m[-1]) is effectively a carry-overflow. The other three are - guard (m[2]), round (m[1]), and sticky (m[0]) - """ - def __init__(self, width, m_extra=True): - FPNumBase.__init__(self, width, m_extra) - - def elaborate(self, platform): - m = FPNumBase.elaborate(self, platform) - - return m - - def create(self, s, e, m): - """ creates a value from sign / exponent / mantissa - - bias is added here, to the exponent - """ - return [ - self.v[-1].eq(s), # sign - self.v[self.e_start:self.e_end].eq(e + self.P127), # exp (add on bias) - self.v[0:self.e_start].eq(m) # mantissa - ] - - def nan(self, s): - return self.create(s, self.P128, 1<<(self.e_start-1)) - - def inf(self, s): - return self.create(s, self.P128, 0) - - def zero(self, s): - return self.create(s, self.N127, 0) - - def create2(self, s, e, m): - """ creates a value from sign / exponent / mantissa - - bias is added here, to the exponent - """ - e = e + self.P127 # exp (add on bias) - return Cat(m[0:self.e_start], - e[0:self.e_end-self.e_start], - s) - - def nan2(self, s): - return self.create2(s, self.P128, self.msb1) - - def inf2(self, s): - return self.create2(s, self.P128, self.mzero) - - def zero2(self, s): - return self.create2(s, self.N127, self.mzero) - - -class MultiShiftRMerge(Elaboratable): - """ shifts down (right) and merges lower bits into m[0]. - m[0] is the "sticky" bit, basically - """ - def __init__(self, width, s_max=None): - if s_max is None: - s_max = int(log(width) / log(2)) - self.smax = s_max - self.m = Signal(width, reset_less=True) - self.inp = Signal(width, reset_less=True) - self.diff = Signal(s_max, reset_less=True) - self.width = width - - def elaborate(self, platform): - m = Module() - - rs = Signal(self.width, reset_less=True) - m_mask = Signal(self.width, reset_less=True) - smask = Signal(self.width, reset_less=True) - stickybit = Signal(reset_less=True) - maxslen = Signal(self.smax, reset_less=True) - maxsleni = Signal(self.smax, reset_less=True) - - sm = MultiShift(self.width-1) - m0s = Const(0, self.width-1) - mw = Const(self.width-1, len(self.diff)) - m.d.comb += [maxslen.eq(Mux(self.diff > mw, mw, self.diff)), - maxsleni.eq(Mux(self.diff > mw, 0, mw-self.diff)), - ] - - m.d.comb += [ - # shift mantissa by maxslen, mask by inverse - rs.eq(sm.rshift(self.inp[1:], maxslen)), - m_mask.eq(sm.rshift(~m0s, maxsleni)), - smask.eq(self.inp[1:] & m_mask), - # sticky bit combines all mask (and mantissa low bit) - stickybit.eq(smask.bool() | self.inp[0]), - # mantissa result contains m[0] already. - self.m.eq(Cat(stickybit, rs)) - ] - return m - - -class FPNumShift(FPNumBase, Elaboratable): - """ Floating-point Number Class for shifting - """ - def __init__(self, mainm, op, inv, width, m_extra=True): - FPNumBase.__init__(self, width, m_extra) - self.latch_in = Signal() - self.mainm = mainm - self.inv = inv - self.op = op - - def elaborate(self, platform): - m = FPNumBase.elaborate(self, platform) - - m.d.comb += self.s.eq(op.s) - m.d.comb += self.e.eq(op.e) - m.d.comb += self.m.eq(op.m) - - with self.mainm.State("align"): - with m.If(self.e < self.inv.e): - m.d.sync += self.shift_down() - - return m - - def shift_down(self, inp): - """ shifts a mantissa down by one. exponent is increased to compensate - - accuracy is lost as a result in the mantissa however there are 3 - guard bits (the latter of which is the "sticky" bit) - """ - return [self.e.eq(inp.e + 1), - self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0)) - ] - - def shift_down_multi(self, diff): - """ shifts a mantissa down. exponent is increased to compensate - - accuracy is lost as a result in the mantissa however there are 3 - guard bits (the latter of which is the "sticky" bit) - - this code works by variable-shifting the mantissa by up to - its maximum bit-length: no point doing more (it'll still be - zero). - - the sticky bit is computed by shifting a batch of 1s by - the same amount, which will introduce zeros. it's then - inverted and used as a mask to get the LSBs of the mantissa. - those are then |'d into the sticky bit. - """ - sm = MultiShift(self.width) - mw = Const(self.m_width-1, len(diff)) - maxslen = Mux(diff > mw, mw, diff) - rs = sm.rshift(self.m[1:], maxslen) - maxsleni = mw - maxslen - m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert - - stickybits = reduce(or_, self.m[1:] & m_mask) | self.m[0] - return [self.e.eq(self.e + diff), - self.m.eq(Cat(stickybits, rs)) - ] - - def shift_up_multi(self, diff): - """ shifts a mantissa up. exponent is decreased to compensate - """ - sm = MultiShift(self.width) - mw = Const(self.m_width, len(diff)) - maxslen = Mux(diff > mw, mw, diff) - - return [self.e.eq(self.e - diff), - self.m.eq(sm.lshift(self.m, maxslen)) - ] - - -class FPNumDecode(FPNumBase): - """ Floating-point Number Class - - Contains signals for an incoming copy of the value, decoded into - sign / exponent / mantissa. - Also contains encoding functions, creation and recognition of - zero, NaN and inf (all signed) - - Four extra bits are included in the mantissa: the top bit - (m[-1]) is effectively a carry-overflow. The other three are - guard (m[2]), round (m[1]), and sticky (m[0]) - """ - def __init__(self, op, width, m_extra=True): - FPNumBase.__init__(self, width, m_extra) - self.op = op - - def elaborate(self, platform): - m = FPNumBase.elaborate(self, platform) - - m.d.comb += self.decode(self.v) - - return m - - def decode(self, v): - """ decodes a latched value into sign / exponent / mantissa - - bias is subtracted here, from the exponent. exponent - is extended to 10 bits so that subtract 127 is done on - a 10-bit number - """ - args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros - #print ("decode", self.e_end) - return [self.m.eq(Cat(*args)), # mantissa - self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp - self.s.eq(v[-1]), # sign - ] - -class FPNumIn(FPNumBase): - """ Floating-point Number Class - - Contains signals for an incoming copy of the value, decoded into - sign / exponent / mantissa. - Also contains encoding functions, creation and recognition of - zero, NaN and inf (all signed) - - Four extra bits are included in the mantissa: the top bit - (m[-1]) is effectively a carry-overflow. The other three are - guard (m[2]), round (m[1]), and sticky (m[0]) - """ - def __init__(self, op, width, m_extra=True): - FPNumBase.__init__(self, width, m_extra) - self.latch_in = Signal() - self.op = op - - def decode2(self, m): - """ decodes a latched value into sign / exponent / mantissa - - bias is subtracted here, from the exponent. exponent - is extended to 10 bits so that subtract 127 is done on - a 10-bit number - """ - v = self.v - args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros - #print ("decode", self.e_end) - res = ObjectProxy(m, pipemode=False) - res.m = Cat(*args) # mantissa - res.e = v[self.e_start:self.e_end] - self.P127 # exp - res.s = v[-1] # sign - return res - - def decode(self, v): - """ decodes a latched value into sign / exponent / mantissa - - bias is subtracted here, from the exponent. exponent - is extended to 10 bits so that subtract 127 is done on - a 10-bit number - """ - args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros - #print ("decode", self.e_end) - return [self.m.eq(Cat(*args)), # mantissa - self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp - self.s.eq(v[-1]), # sign - ] - - def shift_down(self, inp): - """ shifts a mantissa down by one. exponent is increased to compensate - - accuracy is lost as a result in the mantissa however there are 3 - guard bits (the latter of which is the "sticky" bit) - """ - return [self.e.eq(inp.e + 1), - self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0)) - ] - - def shift_down_multi(self, diff, inp=None): - """ shifts a mantissa down. exponent is increased to compensate - - accuracy is lost as a result in the mantissa however there are 3 - guard bits (the latter of which is the "sticky" bit) - - this code works by variable-shifting the mantissa by up to - its maximum bit-length: no point doing more (it'll still be - zero). - - the sticky bit is computed by shifting a batch of 1s by - the same amount, which will introduce zeros. it's then - inverted and used as a mask to get the LSBs of the mantissa. - those are then |'d into the sticky bit. - """ - if inp is None: - inp = self - sm = MultiShift(self.width) - mw = Const(self.m_width-1, len(diff)) - maxslen = Mux(diff > mw, mw, diff) - rs = sm.rshift(inp.m[1:], maxslen) - maxsleni = mw - maxslen - m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert - - #stickybit = reduce(or_, inp.m[1:] & m_mask) | inp.m[0] - stickybit = (inp.m[1:] & m_mask).bool() | inp.m[0] - return [self.e.eq(inp.e + diff), - self.m.eq(Cat(stickybit, rs)) - ] - - def shift_up_multi(self, diff): - """ shifts a mantissa up. exponent is decreased to compensate - """ - sm = MultiShift(self.width) - mw = Const(self.m_width, len(diff)) - maxslen = Mux(diff > mw, mw, diff) - - return [self.e.eq(self.e - diff), - self.m.eq(sm.lshift(self.m, maxslen)) - ] - -class Trigger(Elaboratable): - def __init__(self): - - self.stb = Signal(reset=0) - self.ack = Signal() - self.trigger = Signal(reset_less=True) - - def elaborate(self, platform): - m = Module() - m.d.comb += self.trigger.eq(self.stb & self.ack) - return m - - def eq(self, inp): - return [self.stb.eq(inp.stb), - self.ack.eq(inp.ack) - ] - - def ports(self): - return [self.stb, self.ack] - - -class FPOpIn(PrevControl): - def __init__(self, width): - PrevControl.__init__(self) - self.width = width - - @property - def v(self): - return self.data_i - - def chain_inv(self, in_op, extra=None): - stb = in_op.stb - if extra is not None: - stb = stb & extra - return [self.v.eq(in_op.v), # receive value - self.stb.eq(stb), # receive STB - in_op.ack.eq(~self.ack), # send ACK - ] - - def chain_from(self, in_op, extra=None): - stb = in_op.stb - if extra is not None: - stb = stb & extra - return [self.v.eq(in_op.v), # receive value - self.stb.eq(stb), # receive STB - in_op.ack.eq(self.ack), # send ACK - ] - - -class FPOpOut(NextControl): - def __init__(self, width): - NextControl.__init__(self) - self.width = width - - @property - def v(self): - return self.data_o - - def chain_inv(self, in_op, extra=None): - stb = in_op.stb - if extra is not None: - stb = stb & extra - return [self.v.eq(in_op.v), # receive value - self.stb.eq(stb), # receive STB - in_op.ack.eq(~self.ack), # send ACK - ] - - def chain_from(self, in_op, extra=None): - stb = in_op.stb - if extra is not None: - stb = stb & extra - return [self.v.eq(in_op.v), # receive value - self.stb.eq(stb), # receive STB - in_op.ack.eq(self.ack), # send ACK - ] - - -class Overflow: #(Elaboratable): - def __init__(self): - self.guard = Signal(reset_less=True) # tot[2] - self.round_bit = Signal(reset_less=True) # tot[1] - self.sticky = Signal(reset_less=True) # tot[0] - self.m0 = Signal(reset_less=True) # mantissa zero bit - - self.roundz = Signal(reset_less=True) - - def __iter__(self): - yield self.guard - yield self.round_bit - yield self.sticky - yield self.m0 - - def eq(self, inp): - return [self.guard.eq(inp.guard), - self.round_bit.eq(inp.round_bit), - self.sticky.eq(inp.sticky), - self.m0.eq(inp.m0)] - - def elaborate(self, platform): - m = Module() - m.d.comb += self.roundz.eq(self.guard & \ - (self.round_bit | self.sticky | self.m0)) - return m - - -class FPBase: - """ IEEE754 Floating Point Base Class - - contains common functions for FP manipulation, such as - extracting and packing operands, normalisation, denormalisation, - rounding etc. - """ - - def get_op(self, m, op, v, next_state): - """ this function moves to the next state and copies the operand - when both stb and ack are 1. - acknowledgement is sent by setting ack to ZERO. - """ - res = v.decode2(m) - ack = Signal() - with m.If((op.ready_o) & (op.valid_i_test)): - m.next = next_state - # op is latched in from FPNumIn class on same ack/stb - m.d.comb += ack.eq(0) - with m.Else(): - m.d.comb += ack.eq(1) - return [res, ack] - - def denormalise(self, m, a): - """ denormalises a number. this is probably the wrong name for - this function. for normalised numbers (exponent != minimum) - one *extra* bit (the implicit 1) is added *back in*. - for denormalised numbers, the mantissa is left alone - and the exponent increased by 1. - - both cases *effectively multiply the number stored by 2*, - which has to be taken into account when extracting the result. - """ - with m.If(a.exp_n127): - m.d.sync += a.e.eq(a.N126) # limit a exponent - with m.Else(): - m.d.sync += a.m[-1].eq(1) # set top mantissa bit - - def op_normalise(self, m, op, next_state): - """ operand normalisation - NOTE: just like "align", this one keeps going round every clock - until the result's exponent is within acceptable "range" - """ - with m.If((op.m[-1] == 0)): # check last bit of mantissa - m.d.sync +=[ - op.e.eq(op.e - 1), # DECREASE exponent - op.m.eq(op.m << 1), # shift mantissa UP - ] - with m.Else(): - m.next = next_state - - def normalise_1(self, m, z, of, next_state): - """ first stage normalisation - - NOTE: just like "align", this one keeps going round every clock - until the result's exponent is within acceptable "range" - NOTE: the weirdness of reassigning guard and round is due to - the extra mantissa bits coming from tot[0..2] - """ - with m.If((z.m[-1] == 0) & (z.e > z.N126)): - m.d.sync += [ - z.e.eq(z.e - 1), # DECREASE exponent - z.m.eq(z.m << 1), # shift mantissa UP - z.m[0].eq(of.guard), # steal guard bit (was tot[2]) - of.guard.eq(of.round_bit), # steal round_bit (was tot[1]) - of.round_bit.eq(0), # reset round bit - of.m0.eq(of.guard), - ] - with m.Else(): - m.next = next_state - - def normalise_2(self, m, z, of, next_state): - """ second stage normalisation - - NOTE: just like "align", this one keeps going round every clock - until the result's exponent is within acceptable "range" - NOTE: the weirdness of reassigning guard and round is due to - the extra mantissa bits coming from tot[0..2] - """ - with m.If(z.e < z.N126): - m.d.sync +=[ - z.e.eq(z.e + 1), # INCREASE exponent - z.m.eq(z.m >> 1), # shift mantissa DOWN - of.guard.eq(z.m[0]), - of.m0.eq(z.m[1]), - of.round_bit.eq(of.guard), - of.sticky.eq(of.sticky | of.round_bit) - ] - with m.Else(): - m.next = next_state - - def roundz(self, m, z, roundz): - """ performs rounding on the output. TODO: different kinds of rounding - """ - with m.If(roundz): - m.d.sync += z.m.eq(z.m + 1) # mantissa rounds up - with m.If(z.m == z.m1s): # all 1s - m.d.sync += z.e.eq(z.e + 1) # exponent rounds up - - def corrections(self, m, z, next_state): - """ denormalisation and sign-bug corrections - """ - m.next = next_state - # denormalised, correct exponent to zero - with m.If(z.is_denormalised): - m.d.sync += z.e.eq(z.N127) - - def pack(self, m, z, next_state): - """ packs the result into the output (detects overflow->Inf) - """ - m.next = next_state - # if overflow occurs, return inf - with m.If(z.is_overflowed): - m.d.sync += z.inf(z.s) - with m.Else(): - m.d.sync += z.create(z.s, z.e, z.m) - - def put_z(self, m, z, out_z, next_state): - """ put_z: stores the result in the output. raises stb and waits - for ack to be set to 1 before moving to the next state. - resets stb back to zero when that occurs, as acknowledgement. - """ - m.d.sync += [ - out_z.v.eq(z.v) - ] - with m.If(out_z.valid_o & out_z.ready_i_test): - m.d.sync += out_z.valid_o.eq(0) - m.next = next_state - with m.Else(): - m.d.sync += out_z.valid_o.eq(1) - - -class FPState(FPBase): - def __init__(self, state_from): - self.state_from = state_from - - def set_inputs(self, inputs): - self.inputs = inputs - for k,v in inputs.items(): - setattr(self, k, v) - - def set_outputs(self, outputs): - self.outputs = outputs - for k,v in outputs.items(): - setattr(self, k, v) - - -class FPID: - def __init__(self, id_wid): - self.id_wid = id_wid - if self.id_wid: - self.in_mid = Signal(id_wid, reset_less=True) - self.out_mid = Signal(id_wid, reset_less=True) - else: - self.in_mid = None - self.out_mid = None - - def idsync(self, m): - if self.id_wid is not None: - m.d.sync += self.out_mid.eq(self.in_mid) - - diff --git a/src/add/fpcommon/__init__.py b/src/add/fpcommon/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/add/fpcommon/corrections.py b/src/add/fpcommon/corrections.py deleted file mode 100644 index ce9ba3cd..00000000 --- a/src/add/fpcommon/corrections.py +++ /dev/null @@ -1,69 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Elaboratable -from nmigen.cli import main, verilog -from fpbase import FPState -from fpcommon.roundz import FPRoundData - - -class FPCorrectionsMod(Elaboratable): - - def __init__(self, width, id_wid): - self.width = width - self.id_wid = id_wid - self.i = self.ispec() - self.out_z = self.ospec() - - def ispec(self): - return FPRoundData(self.width, self.id_wid) - - def ospec(self): - return FPRoundData(self.width, self.id_wid) - - def process(self, i): - return self.out_z - - def setup(self, m, i): - """ links module to inputs and outputs - """ - m.submodules.corrections = self - m.d.comb += self.i.eq(i) - - def elaborate(self, platform): - m = Module() - m.submodules.corr_in_z = self.i.z - m.submodules.corr_out_z = self.out_z.z - m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z - with m.If(~self.i.out_do_z): - with m.If(self.i.z.is_denormalised): - m.d.comb += self.out_z.z.e.eq(self.i.z.N127) - return m - - -class FPCorrections(FPState): - - def __init__(self, width, id_wid): - FPState.__init__(self, "corrections") - self.mod = FPCorrectionsMod(width) - self.out_z = self.ospec() - - def ispec(self): - return self.mod.ispec() - - def ospec(self): - return self.mod.ospec() - - def setup(self, m, in_z): - """ links module to inputs and outputs - """ - self.mod.setup(m, in_z) - - m.d.sync += self.out_z.eq(self.mod.out_z) - m.d.sync += self.out_z.mid.eq(self.mod.o.mid) - - def action(self, m): - m.next = "pack" - - diff --git a/src/add/fpcommon/denorm.py b/src/add/fpcommon/denorm.py deleted file mode 100644 index 9fbbc976..00000000 --- a/src/add/fpcommon/denorm.py +++ /dev/null @@ -1,108 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal -from nmigen.cli import main, verilog -from math import log - -from fpbase import FPNumIn, FPNumOut, FPNumBase -from fpbase import FPState - - -class FPSCData: - - def __init__(self, width, id_wid): - self.a = FPNumBase(width, True) - self.b = FPNumBase(width, True) - self.z = FPNumOut(width, False) - self.oz = Signal(width, reset_less=True) - self.out_do_z = Signal(reset_less=True) - self.mid = Signal(id_wid, reset_less=True) - - def __iter__(self): - yield from self.a - yield from self.b - yield from self.z - yield self.oz - yield self.out_do_z - yield self.mid - - def eq(self, i): - return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz), - self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)] - - -class FPAddDeNormMod(FPState): - - def __init__(self, width, id_wid): - self.width = width - self.id_wid = id_wid - self.i = self.ispec() - self.o = self.ospec() - - def ispec(self): - return FPSCData(self.width, self.id_wid) - - def ospec(self): - return FPSCData(self.width, self.id_wid) - - def process(self, i): - return self.o - - def setup(self, m, i): - """ links module to inputs and outputs - """ - m.submodules.denormalise = self - m.d.comb += self.i.eq(i) - - def elaborate(self, platform): - m = Module() - m.submodules.denorm_in_a = self.i.a - m.submodules.denorm_in_b = self.i.b - m.submodules.denorm_out_a = self.o.a - m.submodules.denorm_out_b = self.o.b - - with m.If(~self.i.out_do_z): - # XXX hmmm, don't like repeating identical code - m.d.comb += self.o.a.eq(self.i.a) - with m.If(self.i.a.exp_n127): - m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent - with m.Else(): - m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit - - m.d.comb += self.o.b.eq(self.i.b) - with m.If(self.i.b.exp_n127): - m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent - with m.Else(): - m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit - - m.d.comb += self.o.mid.eq(self.i.mid) - m.d.comb += self.o.z.eq(self.i.z) - m.d.comb += self.o.out_do_z.eq(self.i.out_do_z) - m.d.comb += self.o.oz.eq(self.i.oz) - - return m - - -class FPAddDeNorm(FPState): - - def __init__(self, width, id_wid): - FPState.__init__(self, "denormalise") - self.mod = FPAddDeNormMod(width) - self.out_a = FPNumBase(width) - self.out_b = FPNumBase(width) - - def setup(self, m, i): - """ links module to inputs and outputs - """ - self.mod.setup(m, i) - - m.d.sync += self.out_a.eq(self.mod.out_a) - m.d.sync += self.out_b.eq(self.mod.out_b) - - def action(self, m): - # Denormalised Number checks - m.next = "align" - - diff --git a/src/add/fpcommon/getop.py b/src/add/fpcommon/getop.py deleted file mode 100644 index 1988997a..00000000 --- a/src/add/fpcommon/getop.py +++ /dev/null @@ -1,174 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal, Cat, Mux, Array, Const, Elaboratable -from nmigen.lib.coding import PriorityEncoder -from nmigen.cli import main, verilog -from math import log - -from fpbase import FPNumIn, FPNumOut, FPOpIn, Overflow, FPBase, FPNumBase -from fpbase import MultiShiftRMerge, Trigger -from singlepipe import (ControlBase, StageChain, SimpleHandshake, - PassThroughStage, PrevControl) -from multipipe import CombMuxOutPipe -from multipipe import PriorityCombMuxInPipe - -from fpbase import FPState -import nmoperator - - -class FPGetOpMod(Elaboratable): - def __init__(self, width): - self.in_op = FPOpIn(width) - self.in_op.data_i = Signal(width) - self.out_op = Signal(width) - self.out_decode = Signal(reset_less=True) - - def elaborate(self, platform): - m = Module() - m.d.comb += self.out_decode.eq((self.in_op.ready_o) & \ - (self.in_op.valid_i_test)) - m.submodules.get_op_in = self.in_op - #m.submodules.get_op_out = self.out_op - with m.If(self.out_decode): - m.d.comb += [ - self.out_op.eq(self.in_op.v), - ] - return m - - -class FPGetOp(FPState): - """ gets operand - """ - - def __init__(self, in_state, out_state, in_op, width): - FPState.__init__(self, in_state) - self.out_state = out_state - self.mod = FPGetOpMod(width) - self.in_op = in_op - self.out_op = Signal(width) - self.out_decode = Signal(reset_less=True) - - def setup(self, m, in_op): - """ links module to inputs and outputs - """ - setattr(m.submodules, self.state_from, self.mod) - m.d.comb += nmoperator.eq(self.mod.in_op, in_op) - m.d.comb += self.out_decode.eq(self.mod.out_decode) - - def action(self, m): - with m.If(self.out_decode): - m.next = self.out_state - m.d.sync += [ - self.in_op.ready_o.eq(0), - self.out_op.eq(self.mod.out_op) - ] - with m.Else(): - m.d.sync += self.in_op.ready_o.eq(1) - - -class FPNumBase2Ops: - - def __init__(self, width, id_wid, m_extra=True): - self.a = FPNumBase(width, m_extra) - self.b = FPNumBase(width, m_extra) - self.mid = Signal(id_wid, reset_less=True) - - def eq(self, i): - return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)] - - def ports(self): - return [self.a, self.b, self.mid] - - -class FPADDBaseData: - - def __init__(self, width, id_wid): - self.width = width - self.id_wid = id_wid - self.a = Signal(width) - self.b = Signal(width) - self.mid = Signal(id_wid, reset_less=True) - - def eq(self, i): - return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)] - - def ports(self): - return [self.a, self.b, self.mid] - - -class FPGet2OpMod(PrevControl): - def __init__(self, width, id_wid): - PrevControl.__init__(self) - self.width = width - self.id_wid = id_wid - self.data_i = self.ispec() - self.i = self.data_i - self.o = self.ospec() - - def ispec(self): - return FPADDBaseData(self.width, self.id_wid) - - def ospec(self): - return FPADDBaseData(self.width, self.id_wid) - - def process(self, i): - return self.o - - def elaborate(self, platform): - m = PrevControl.elaborate(self, platform) - with m.If(self.trigger): - m.d.comb += [ - self.o.eq(self.data_i), - ] - return m - - -class FPGet2Op(FPState): - """ gets operands - """ - - def __init__(self, in_state, out_state, width, id_wid): - FPState.__init__(self, in_state) - self.out_state = out_state - self.mod = FPGet2OpMod(width, id_wid) - self.o = self.ospec() - self.in_stb = Signal(reset_less=True) - self.out_ack = Signal(reset_less=True) - self.out_decode = Signal(reset_less=True) - - def ispec(self): - return self.mod.ispec() - - def ospec(self): - return self.mod.ospec() - - def trigger_setup(self, m, in_stb, in_ack): - """ links stb/ack - """ - m.d.comb += self.mod.valid_i.eq(in_stb) - m.d.comb += in_ack.eq(self.mod.ready_o) - - def setup(self, m, i): - """ links module to inputs and outputs - """ - m.submodules.get_ops = self.mod - m.d.comb += self.mod.i.eq(i) - m.d.comb += self.out_ack.eq(self.mod.ready_o) - m.d.comb += self.out_decode.eq(self.mod.trigger) - - def process(self, i): - return self.o - - def action(self, m): - with m.If(self.out_decode): - m.next = self.out_state - m.d.sync += [ - self.mod.ready_o.eq(0), - self.o.eq(self.mod.o), - ] - with m.Else(): - m.d.sync += self.mod.ready_o.eq(1) - - diff --git a/src/add/fpcommon/normtopack.py b/src/add/fpcommon/normtopack.py deleted file mode 100644 index 87d08125..00000000 --- a/src/add/fpcommon/normtopack.py +++ /dev/null @@ -1,52 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -#from nmigen.cli import main, verilog - -from singlepipe import StageChain, SimpleHandshake - -from fpbase import FPState, FPID -from fpcommon.postcalc import FPAddStage1Data -from fpcommon.postnormalise import FPNorm1ModSingle -from fpcommon.roundz import FPRoundMod -from fpcommon.corrections import FPCorrectionsMod -from fpcommon.pack import FPPackData, FPPackMod - - -class FPNormToPack(FPState, SimpleHandshake): - - def __init__(self, width, id_wid): - FPState.__init__(self, "normalise_1") - self.id_wid = id_wid - self.width = width - SimpleHandshake.__init__(self, self) # pipeline is its own stage - - def ispec(self): - return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec - - def ospec(self): - return FPPackData(self.width, self.id_wid) # FPPackMod ospec - - def setup(self, m, i): - """ links module to inputs and outputs - """ - - # Normalisation, Rounding Corrections, Pack - in a chain - nmod = FPNorm1ModSingle(self.width, self.id_wid) - rmod = FPRoundMod(self.width, self.id_wid) - cmod = FPCorrectionsMod(self.width, self.id_wid) - pmod = FPPackMod(self.width, self.id_wid) - stages = [nmod, rmod, cmod, pmod] - chain = StageChain(stages) - chain.setup(m, i) - self.out_z = pmod.ospec() - - self.o = pmod.o - - def process(self, i): - return self.o - - def action(self, m): - m.d.sync += self.out_z.eq(self.process(None)) - m.next = "pack_put_z" diff --git a/src/add/fpcommon/pack.py b/src/add/fpcommon/pack.py deleted file mode 100644 index 1464883c..00000000 --- a/src/add/fpcommon/pack.py +++ /dev/null @@ -1,84 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal, Elaboratable -from nmigen.cli import main, verilog - -from fpbase import FPNumOut -from fpbase import FPState -from fpcommon.roundz import FPRoundData -from singlepipe import Object - - -class FPPackData(Object): - - def __init__(self, width, id_wid): - Object.__init__(self) - self.z = Signal(width, reset_less=True) - self.mid = Signal(id_wid, reset_less=True) - - -class FPPackMod(Elaboratable): - - def __init__(self, width, id_wid): - self.width = width - self.id_wid = id_wid - self.i = self.ispec() - self.o = self.ospec() - - def ispec(self): - return FPRoundData(self.width, self.id_wid) - - def ospec(self): - return FPPackData(self.width, self.id_wid) - - def process(self, i): - return self.o - - def setup(self, m, in_z): - """ links module to inputs and outputs - """ - m.submodules.pack = self - m.d.comb += self.i.eq(in_z) - - def elaborate(self, platform): - m = Module() - z = FPNumOut(self.width, False) - m.submodules.pack_in_z = self.i.z - m.submodules.pack_out_z = z - m.d.comb += self.o.mid.eq(self.i.mid) - with m.If(~self.i.out_do_z): - with m.If(self.i.z.is_overflowed): - m.d.comb += z.inf(self.i.z.s) - with m.Else(): - m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m) - with m.Else(): - m.d.comb += z.v.eq(self.i.oz) - m.d.comb += self.o.z.eq(z.v) - return m - - -class FPPack(FPState): - - def __init__(self, width, id_wid): - FPState.__init__(self, "pack") - self.mod = FPPackMod(width) - self.out_z = self.ospec() - - def ispec(self): - return self.mod.ispec() - - def ospec(self): - return self.mod.ospec() - - def setup(self, m, in_z): - """ links module to inputs and outputs - """ - self.mod.setup(m, in_z) - - m.d.sync += self.out_z.v.eq(self.mod.out_z.v) - m.d.sync += self.out_z.mid.eq(self.mod.o.mid) - - def action(self, m): - m.next = "pack_put_z" diff --git a/src/add/fpcommon/postcalc.py b/src/add/fpcommon/postcalc.py deleted file mode 100644 index 7111dc8a..00000000 --- a/src/add/fpcommon/postcalc.py +++ /dev/null @@ -1,26 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Signal -from fpbase import Overflow, FPNumBase - -class FPAddStage1Data: - - def __init__(self, width, id_wid): - self.z = FPNumBase(width, False) - self.out_do_z = Signal(reset_less=True) - self.oz = Signal(width, reset_less=True) - self.of = Overflow() - self.mid = Signal(id_wid, reset_less=True) - - def __iter__(self): - yield from self.z - yield self.out_do_z - yield self.oz - yield from self.of - yield self.mid - - def eq(self, i): - return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz), - self.of.eq(i.of), self.mid.eq(i.mid)] diff --git a/src/add/fpcommon/postnormalise.py b/src/add/fpcommon/postnormalise.py deleted file mode 100644 index b072490f..00000000 --- a/src/add/fpcommon/postnormalise.py +++ /dev/null @@ -1,270 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal, Cat, Mux, Elaboratable -from nmigen.lib.coding import PriorityEncoder -from nmigen.cli import main, verilog -from math import log - -from fpbase import Overflow, FPNumBase -from fpbase import MultiShiftRMerge -from fpbase import FPState -from .postcalc import FPAddStage1Data - - -class FPNorm1Data: - - def __init__(self, width, id_wid): - self.roundz = Signal(reset_less=True) - self.z = FPNumBase(width, False) - self.out_do_z = Signal(reset_less=True) - self.oz = Signal(width, reset_less=True) - self.mid = Signal(id_wid, reset_less=True) - - def eq(self, i): - return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz), - self.roundz.eq(i.roundz), self.mid.eq(i.mid)] - - -class FPNorm1ModSingle(Elaboratable): - - def __init__(self, width, id_wid): - self.width = width - self.id_wid = id_wid - self.i = self.ispec() - self.o = self.ospec() - - def ispec(self): - return FPAddStage1Data(self.width, self.id_wid) - - def ospec(self): - return FPNorm1Data(self.width, self.id_wid) - - def setup(self, m, i): - """ links module to inputs and outputs - """ - m.submodules.normalise_1 = self - m.d.comb += self.i.eq(i) - - def process(self, i): - return self.o - - def elaborate(self, platform): - m = Module() - - mwid = self.o.z.m_width+2 - pe = PriorityEncoder(mwid) - m.submodules.norm_pe = pe - - of = Overflow() - m.d.comb += self.o.roundz.eq(of.roundz) - - m.submodules.norm1_out_z = self.o.z - m.submodules.norm1_out_overflow = of - m.submodules.norm1_in_z = self.i.z - m.submodules.norm1_in_overflow = self.i.of - - i = self.ispec() - m.submodules.norm1_insel_z = i.z - m.submodules.norm1_insel_overflow = i.of - - espec = (len(i.z.e), True) - ediff_n126 = Signal(espec, reset_less=True) - msr = MultiShiftRMerge(mwid, espec) - m.submodules.multishift_r = msr - - m.d.comb += i.eq(self.i) - # initialise out from in (overridden below) - m.d.comb += self.o.z.eq(i.z) - m.d.comb += of.eq(i.of) - # normalisation increase/decrease conditions - decrease = Signal(reset_less=True) - increase = Signal(reset_less=True) - m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126) - m.d.comb += increase.eq(i.z.exp_lt_n126) - # decrease exponent - with m.If(~self.i.out_do_z): - with m.If(decrease): - # *sigh* not entirely obvious: count leading zeros (clz) - # with a PriorityEncoder: to find from the MSB - # we reverse the order of the bits. - temp_m = Signal(mwid, reset_less=True) - temp_s = Signal(mwid+1, reset_less=True) - clz = Signal((len(i.z.e), True), reset_less=True) - # make sure that the amount to decrease by does NOT - # go below the minimum non-INF/NaN exponent - limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o, - i.z.exp_sub_n126) - m.d.comb += [ - # cat round and guard bits back into the mantissa - temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)), - pe.i.eq(temp_m[::-1]), # inverted - clz.eq(limclz), # count zeros from MSB down - temp_s.eq(temp_m << clz), # shift mantissa UP - self.o.z.e.eq(i.z.e - clz), # DECREASE exponent - self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1 - of.m0.eq(temp_s[2]), # copy of mantissa[0] - # overflow in bits 0..1: got shifted too (leave sticky) - of.guard.eq(temp_s[1]), # guard - of.round_bit.eq(temp_s[0]), # round - ] - # increase exponent - with m.Elif(increase): - temp_m = Signal(mwid+1, reset_less=True) - m.d.comb += [ - temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard, - i.z.m)), - ediff_n126.eq(i.z.N126 - i.z.e), - # connect multi-shifter to inp/out mantissa (and ediff) - msr.inp.eq(temp_m), - msr.diff.eq(ediff_n126), - self.o.z.m.eq(msr.m[3:]), - of.m0.eq(temp_s[3]), # copy of mantissa[0] - # overflow in bits 0..1: got shifted too (leave sticky) - of.guard.eq(temp_s[2]), # guard - of.round_bit.eq(temp_s[1]), # round - of.sticky.eq(temp_s[0]), # sticky - self.o.z.e.eq(i.z.e + ediff_n126), - ] - - m.d.comb += self.o.mid.eq(self.i.mid) - m.d.comb += self.o.out_do_z.eq(self.i.out_do_z) - m.d.comb += self.o.oz.eq(self.i.oz) - - return m - - -class FPNorm1ModMulti: - - def __init__(self, width, single_cycle=True): - self.width = width - self.in_select = Signal(reset_less=True) - self.in_z = FPNumBase(width, False) - self.in_of = Overflow() - self.temp_z = FPNumBase(width, False) - self.temp_of = Overflow() - self.out_z = FPNumBase(width, False) - self.out_of = Overflow() - - def elaborate(self, platform): - m = Module() - - m.submodules.norm1_out_z = self.out_z - m.submodules.norm1_out_overflow = self.out_of - m.submodules.norm1_temp_z = self.temp_z - m.submodules.norm1_temp_of = self.temp_of - m.submodules.norm1_in_z = self.in_z - m.submodules.norm1_in_overflow = self.in_of - - in_z = FPNumBase(self.width, False) - in_of = Overflow() - m.submodules.norm1_insel_z = in_z - m.submodules.norm1_insel_overflow = in_of - - # select which of temp or in z/of to use - with m.If(self.in_select): - m.d.comb += in_z.eq(self.in_z) - m.d.comb += in_of.eq(self.in_of) - with m.Else(): - m.d.comb += in_z.eq(self.temp_z) - m.d.comb += in_of.eq(self.temp_of) - # initialise out from in (overridden below) - m.d.comb += self.out_z.eq(in_z) - m.d.comb += self.out_of.eq(in_of) - # normalisation increase/decrease conditions - decrease = Signal(reset_less=True) - increase = Signal(reset_less=True) - m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126) - m.d.comb += increase.eq(in_z.exp_lt_n126) - m.d.comb += self.out_norm.eq(decrease | increase) # loop-end - # decrease exponent - with m.If(decrease): - m.d.comb += [ - self.out_z.e.eq(in_z.e - 1), # DECREASE exponent - self.out_z.m.eq(in_z.m << 1), # shift mantissa UP - self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2]) - self.out_of.guard.eq(in_of.round_bit), # round (was tot[1]) - self.out_of.round_bit.eq(0), # reset round bit - self.out_of.m0.eq(in_of.guard), - ] - # increase exponent - with m.Elif(increase): - m.d.comb += [ - self.out_z.e.eq(in_z.e + 1), # INCREASE exponent - self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN - self.out_of.guard.eq(in_z.m[0]), - self.out_of.m0.eq(in_z.m[1]), - self.out_of.round_bit.eq(in_of.guard), - self.out_of.sticky.eq(in_of.sticky | in_of.round_bit) - ] - - return m - - -class FPNorm1Single(FPState): - - def __init__(self, width, id_wid, single_cycle=True): - FPState.__init__(self, "normalise_1") - self.mod = FPNorm1ModSingle(width) - self.o = self.ospec() - self.out_z = FPNumBase(width, False) - self.out_roundz = Signal(reset_less=True) - - def ispec(self): - return self.mod.ispec() - - def ospec(self): - return self.mod.ospec() - - def setup(self, m, i): - """ links module to inputs and outputs - """ - self.mod.setup(m, i) - - def action(self, m): - m.next = "round" - - -class FPNorm1Multi(FPState): - - def __init__(self, width, id_wid): - FPState.__init__(self, "normalise_1") - self.mod = FPNorm1ModMulti(width) - self.stb = Signal(reset_less=True) - self.ack = Signal(reset=0, reset_less=True) - self.out_norm = Signal(reset_less=True) - self.in_accept = Signal(reset_less=True) - self.temp_z = FPNumBase(width) - self.temp_of = Overflow() - self.out_z = FPNumBase(width) - self.out_roundz = Signal(reset_less=True) - - def setup(self, m, in_z, in_of, norm_stb): - """ links module to inputs and outputs - """ - self.mod.setup(m, in_z, in_of, norm_stb, - self.in_accept, self.temp_z, self.temp_of, - self.out_z, self.out_norm) - - m.d.comb += self.stb.eq(norm_stb) - m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state - - def action(self, m): - m.d.comb += self.in_accept.eq((~self.ack) & (self.stb)) - m.d.sync += self.temp_of.eq(self.mod.out_of) - m.d.sync += self.temp_z.eq(self.out_z) - with m.If(self.out_norm): - with m.If(self.in_accept): - m.d.sync += [ - self.ack.eq(1), - ] - with m.Else(): - m.d.sync += self.ack.eq(0) - with m.Else(): - # normalisation not required (or done). - m.next = "round" - m.d.sync += self.ack.eq(1) - m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz) - - diff --git a/src/add/fpcommon/prenormalise.py b/src/add/fpcommon/prenormalise.py deleted file mode 100644 index 0b3a65cb..00000000 --- a/src/add/fpcommon/prenormalise.py +++ /dev/null @@ -1,83 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal, Cat, -from nmigen.lib.coding import PriorityEncoder -from nmigen.cli import main, verilog -from math import log - -from fpbase import Overflow, FPNumBase -from fpbase import MultiShiftRMerge - -from fpbase import FPState - - -class FPNormaliseModSingle: - - def __init__(self, width): - self.width = width - self.in_z = self.ispec() - self.out_z = self.ospec() - - def ispec(self): - return FPNumBase(self.width, False) - - def ospec(self): - return FPNumBase(self.width, False) - - def setup(self, m, i): - """ links module to inputs and outputs - """ - m.submodules.normalise = self - m.d.comb += self.i.eq(i) - - def elaborate(self, platform): - m = Module() - - mwid = self.out_z.m_width+2 - pe = PriorityEncoder(mwid) - m.submodules.norm_pe = pe - - m.submodules.norm1_out_z = self.out_z - m.submodules.norm1_in_z = self.in_z - - in_z = FPNumBase(self.width, False) - in_of = Overflow() - m.submodules.norm1_insel_z = in_z - m.submodules.norm1_insel_overflow = in_of - - espec = (len(in_z.e), True) - ediff_n126 = Signal(espec, reset_less=True) - msr = MultiShiftRMerge(mwid, espec) - m.submodules.multishift_r = msr - - m.d.comb += in_z.eq(self.in_z) - m.d.comb += in_of.eq(self.in_of) - # initialise out from in (overridden below) - m.d.comb += self.out_z.eq(in_z) - m.d.comb += self.out_of.eq(in_of) - # normalisation decrease condition - decrease = Signal(reset_less=True) - m.d.comb += decrease.eq(in_z.m_msbzero) - # decrease exponent - with m.If(decrease): - # *sigh* not entirely obvious: count leading zeros (clz) - # with a PriorityEncoder: to find from the MSB - # we reverse the order of the bits. - temp_m = Signal(mwid, reset_less=True) - temp_s = Signal(mwid+1, reset_less=True) - clz = Signal((len(in_z.e), True), reset_less=True) - m.d.comb += [ - # cat round and guard bits back into the mantissa - temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)), - pe.i.eq(temp_m[::-1]), # inverted - clz.eq(pe.o), # count zeros from MSB down - temp_s.eq(temp_m << clz), # shift mantissa UP - self.out_z.e.eq(in_z.e - clz), # DECREASE exponent - self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1 - ] - - return m - - diff --git a/src/add/fpcommon/putz.py b/src/add/fpcommon/putz.py deleted file mode 100644 index 8173ed85..00000000 --- a/src/add/fpcommon/putz.py +++ /dev/null @@ -1,60 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Signal -from nmigen.cli import main, verilog -from fpbase import FPState - - -class FPPutZ(FPState): - - def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None): - FPState.__init__(self, state) - if to_state is None: - to_state = "get_ops" - self.to_state = to_state - self.in_z = in_z - self.out_z = out_z - self.in_mid = in_mid - self.out_mid = out_mid - - def action(self, m): - if self.in_mid is not None: - m.d.sync += self.out_mid.eq(self.in_mid) - m.d.sync += [ - self.out_z.z.v.eq(self.in_z) - ] - with m.If(self.out_z.z.valid_o & self.out_z.z.ready_i_test): - m.d.sync += self.out_z.z.valid_o.eq(0) - m.next = self.to_state - with m.Else(): - m.d.sync += self.out_z.z.valid_o.eq(1) - - -class FPPutZIdx(FPState): - - def __init__(self, state, in_z, out_zs, in_mid, to_state=None): - FPState.__init__(self, state) - if to_state is None: - to_state = "get_ops" - self.to_state = to_state - self.in_z = in_z - self.out_zs = out_zs - self.in_mid = in_mid - - def action(self, m): - outz_stb = Signal(reset_less=True) - outz_ack = Signal(reset_less=True) - m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].valid_o), - outz_ack.eq(self.out_zs[self.in_mid].ready_i_test), - ] - m.d.sync += [ - self.out_zs[self.in_mid].v.eq(self.in_z.v) - ] - with m.If(outz_stb & outz_ack): - m.d.sync += self.out_zs[self.in_mid].valid_o.eq(0) - m.next = self.to_state - with m.Else(): - m.d.sync += self.out_zs[self.in_mid].valid_o.eq(1) - diff --git a/src/add/fpcommon/roundz.py b/src/add/fpcommon/roundz.py deleted file mode 100644 index 420d6669..00000000 --- a/src/add/fpcommon/roundz.py +++ /dev/null @@ -1,82 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal, Elaboratable -from nmigen.cli import main, verilog - -from fpbase import FPNumBase -from fpbase import FPState -from fpcommon.postnormalise import FPNorm1Data - - -class FPRoundData: - - def __init__(self, width, id_wid): - self.z = FPNumBase(width, False) - self.out_do_z = Signal(reset_less=True) - self.oz = Signal(width, reset_less=True) - self.mid = Signal(id_wid, reset_less=True) - - def eq(self, i): - return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz), - self.mid.eq(i.mid)] - - -class FPRoundMod(Elaboratable): - - def __init__(self, width, id_wid): - self.width = width - self.id_wid = id_wid - self.i = self.ispec() - self.out_z = self.ospec() - - def ispec(self): - return FPNorm1Data(self.width, self.id_wid) - - def ospec(self): - return FPRoundData(self.width, self.id_wid) - - def process(self, i): - return self.out_z - - def setup(self, m, i): - m.submodules.roundz = self - m.d.comb += self.i.eq(i) - - def elaborate(self, platform): - m = Module() - m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z - with m.If(~self.i.out_do_z): - with m.If(self.i.roundz): - m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up - with m.If(self.i.z.m == self.i.z.m1s): # all 1s - m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up - - return m - - -class FPRound(FPState): - - def __init__(self, width, id_wid): - FPState.__init__(self, "round") - self.mod = FPRoundMod(width) - self.out_z = self.ospec() - - def ispec(self): - return self.mod.ispec() - - def ospec(self): - return self.mod.ospec() - - def setup(self, m, i): - """ links module to inputs and outputs - """ - self.mod.setup(m, i) - - self.idsync(m) - m.d.sync += self.out_z.eq(self.mod.out_z) - m.d.sync += self.out_z.mid.eq(self.mod.o.mid) - - def action(self, m): - m.next = "corrections" diff --git a/src/add/fsqrt.py b/src/add/fsqrt.py deleted file mode 100644 index 02449b0f..00000000 --- a/src/add/fsqrt.py +++ /dev/null @@ -1,256 +0,0 @@ -from sfpy import Float32 - - -# XXX DO NOT USE, fails on num=65536. wark-wark... -def sqrtsimple(num): - res = 0 - bit = 1 - - while (bit < num): - bit <<= 2 - - while (bit != 0): - if (num >= res + bit): - num -= res + bit - res = (res >> 1) + bit - else: - res >>= 1 - bit >>= 2 - - return res - - -def sqrt(num): - D = num # D is input (from num) - Q = 0 # quotient - R = 0 # remainder - for i in range(64, -1, -1): # negative ranges are weird... - - R = (R<<2)|((D>>(i+i))&3) - - if R >= 0: - R -= ((Q<<2)|1) # -Q01 - else: - R += ((Q<<2)|3) # +Q11 - - Q <<= 1 - if R >= 0: - Q |= 1 # new Q - - if R < 0: - R = R + ((Q<<1)|1) - - return Q, R - - -# grabbed these from unit_test_single (convenience, this is just experimenting) - -def get_mantissa(x): - return 0x7fffff & x - -def get_exponent(x): - return ((x & 0x7f800000) >> 23) - 127 - -def set_exponent(x, e): - return (x & ~0x7f800000) | ((e+127) << 23) - -def get_sign(x): - return ((x & 0x80000000) >> 31) - -# convert FP32 to s/e/m -def create_fp32(s, e, m): - """ receive sign, exponent, mantissa, return FP32 """ - return set_exponent((s << 31) | get_mantissa(m)) - -# convert s/e/m to FP32 -def decode_fp32(x): - """ receive FP32, return sign, exponent, mantissa """ - return get_sign(x), get_exponent(x), get_mantissa(x) - - -# main function, takes mantissa and exponent as separate arguments -# returns a tuple, sqrt'd mantissa, sqrt'd exponent - -def main(mantissa, exponent): - if exponent & 1 != 0: - # shift mantissa up, subtract 1 from exp to compensate - mantissa <<= 1 - exponent -= 1 - m, r = sqrt(mantissa) - return m, r, exponent >> 1 - - -#normalization function -def normalise(s, m, e, lowbits): - if (lowbits >= 2): - m += 1 - if get_mantissa(m) == ((1<<24)-1): - e += 1 - return s, m, e - - -def fsqrt_test(x): - - xbits = x.bits - print ("x", x, type(x)) - sq_test = x.sqrt() - print ("sqrt", sq_test) - - print (xbits, type(xbits)) - s, e, m = decode_fp32(xbits) - print("x decode", s, e, m, hex(m)) - - m |= 1<<23 # set top bit (the missing "1" from mantissa) - m <<= 27 - - sm, sr, se = main(m, e) - lowbits = sm & 0x3 - sm >>= 2 - sm = get_mantissa(sm) - #sm += 2 - - s, sm, se = normalise(s, sm, se, lowbits) - - print("our sqrt", s, se, sm, hex(sm), bin(sm), "lowbits", lowbits, - "rem", hex(sr)) - if lowbits >= 2: - print ("probably needs rounding (+1 on mantissa)") - - sq_xbits = sq_test.bits - s, e, m = decode_fp32(sq_xbits) - print ("sf32 sqrt", s, e, m, hex(m), bin(m)) - print () - -if __name__ == '__main__': - - # quick test up to 1000 of two sqrt functions - for Q in range(1, int(1e4)): - print(Q, sqrt(Q), sqrtsimple(Q), int(Q**0.5)) - assert int(Q**0.5) == sqrtsimple(Q), "Q sqrtsimpl fail %d" % Q - assert int(Q**0.5) == sqrt(Q)[0], "Q sqrt fail %d" % Q - - # quick mantissa/exponent demo - for e in range(26): - for m in range(26): - ms, mr, es = main(m, e) - print("m:%d e:%d sqrt: m:%d-%d e:%d" % (m, e, ms, mr, es)) - - x = Float32(1234.123456789) - fsqrt_test(x) - x = Float32(32.1) - fsqrt_test(x) - x = Float32(16.0) - fsqrt_test(x) - x = Float32(8.0) - fsqrt_test(x) - x = Float32(8.5) - fsqrt_test(x) - x = Float32(3.14159265358979323) - fsqrt_test(x) - x = Float32(12.99392923123123) - fsqrt_test(x) - x = Float32(0.123456) - fsqrt_test(x) - - - - -""" - -Notes: -https://pdfs.semanticscholar.org/5060/4e9aff0e37089c4ab9a376c3f35761ffe28b.pdf - -//This is the main code of integer sqrt function found here:http://verilogcodes.blogspot.com/2017/11/a-verilog-function-for-finding-square-root.html -// - -module testbench; - -reg [15:0] sqr; - -//Verilog function to find square root of a 32 bit number. -//The output is 16 bit. -function [15:0] sqrt; - input [31:0] num; //declare input - //intermediate signals. - reg [31:0] a; - reg [15:0] q; - reg [17:0] left,right,r; - integer i; -begin - //initialize all the variables. - a = num; - q = 0; - i = 0; - left = 0; //input to adder/sub - right = 0; //input to adder/sub - r = 0; //remainder - //run the calculations for 16 iterations. - for(i=0;i<16;i=i+1) begin - right = {q,r[17],1'b1}; - left = {r[15:0],a[31:30]}; - a = {a[29:0],2'b00}; //left shift by 2 bits. - if (r[17] == 1) //add if r is negative - r = left + right; - else //subtract if r is positive - r = left - right; - q = {q[14:0],!r[17]}; - end - sqrt = q; //final assignment of output. -end -endfunction //end of Function - - -c version (from paper linked from URL) - -unsigned squart(D, r) /*Non-Restoring sqrt*/ - unsigned D; /*D:32-bit unsigned integer to be square rooted */ - int *r; -{ - unsigned Q = 0; /*Q:16-bit unsigned integer (root)*/ - int R = 0; /*R:17-bit integer (remainder)*/ - int i; - for (i = 15;i>=0;i--) /*for each root bit*/ - { - if (R>=0) - { /*new remainder:*/ - R = R<<2)|((D>>(i+i))&3); - R = R-((Q<<2)|1); /*-Q01*/ - } - else - { /*new remainder:*/ - R = R<<2)|((D>>(i+i))&3); - R = R+((Q<<2)|3); /*+Q11*/ - } - if (R>=0) Q = Q<<1)|1; /*new Q:*/ - else Q = Q<<1)|0; /*new Q:*/ - } - - /*remainder adjusting*/ - if (R<0) R = R+((Q<<1)|1); - *r = R; /*return remainder*/ - return(Q); /*return root*/ -} - -From wikipedia page: - -short isqrt(short num) { - short res = 0; - short bit = 1 << 14; // The second-to-top bit is set: 1 << 30 for 32 bits - - // "bit" starts at the highest power of four <= the argument. - while (bit > num) - bit >>= 2; - - while (bit != 0) { - if (num >= res + bit) { - num -= res + bit; - res = (res >> 1) + bit; - } - else - res >>= 1; - bit >>= 2; - } - return res; -} - -""" diff --git a/src/add/function_unit.py b/src/add/function_unit.py deleted file mode 100644 index 108c84f3..00000000 --- a/src/add/function_unit.py +++ /dev/null @@ -1,44 +0,0 @@ -from nmigen import Signal, Cat, Const, Mux, Module, Array -from nmigen.cli import main, verilog - -from nmigen_add_experiment import FPADD -from rstation_row import ReservationStationRow - -from math import log - -class FunctionUnit: - - def __init__(self, width, num_units): - """ Function Unit - - * width: bit-width of IEEE754. supported: 16, 32, 64 - * num_units: number of Reservation Stations - """ - self.width = width - - fus = [] - bsz = int(log(width) / log(2)) - for i in range(num_units): - mid = Const(i, bsz) - rs = ReservationStationRow(width, mid) - rs.name = "RS%d" % i - fus.append(rs) - self.fus = Array(fus) - - def elaborate(self, platform=None): - """ creates the HDL code-fragment for ReservationStationRow - """ - m = Module() - - return m - - -if __name__ == "__main__": - rs = ReservationStationRow(width=32, id_wid=Const(1,4) - main(alu, ports=[rs.in_a, rs.in_b, rs.out_z] - - # works... but don't use, just do "python fname.py convert -t v" - #print (verilog.convert(alu, ports=[ - # ports=alu.in_a.ports() + \ - # alu.in_b.ports() + \ - # alu.out_z.ports()) diff --git a/src/add/inputgroup.py b/src/add/inputgroup.py deleted file mode 100644 index e1b775d4..00000000 --- a/src/add/inputgroup.py +++ /dev/null @@ -1,115 +0,0 @@ -from nmigen import Module, Signal, Cat, Array, Const -from nmigen.lib.coding import PriorityEncoder -from math import log - -from fpbase import Trigger - - -class FPGetSyncOpsMod: - def __init__(self, width, num_ops=2): - self.width = width - self.num_ops = num_ops - inops = [] - outops = [] - for i in range(num_ops): - inops.append(Signal(width, reset_less=True)) - outops.append(Signal(width, reset_less=True)) - self.in_op = inops - self.out_op = outops - self.stb = Signal(num_ops) - self.ack = Signal() - self.ready = Signal(reset_less=True) - self.out_decode = Signal(reset_less=True) - - def elaborate(self, platform): - m = Module() - m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False))) - m.d.comb += self.out_decode.eq(self.ack & self.ready) - with m.If(self.out_decode): - for i in range(self.num_ops): - m.d.comb += [ - self.out_op[i].eq(self.in_op[i]), - ] - return m - - def ports(self): - return self.in_op + self.out_op + [self.stb, self.ack] - - -class FPOps(Trigger): - def __init__(self, width, num_ops): - Trigger.__init__(self) - self.width = width - self.num_ops = num_ops - - res = [] - for i in range(num_ops): - res.append(Signal(width)) - self.v = Array(res) - - def ports(self): - res = [] - for i in range(self.num_ops): - res.append(self.v[i]) - res.append(self.ack) - res.append(self.stb) - return res - - -class InputGroup: - def __init__(self, width, num_ops=2, num_rows=4): - self.width = width - self.num_ops = num_ops - self.num_rows = num_rows - self.mmax = int(log(self.num_rows) / log(2)) - self.rs = [] - self.mid = Signal(self.mmax, reset_less=True) # multiplex id - for i in range(num_rows): - self.rs.append(FPGetSyncOpsMod(width, num_ops)) - self.rs = Array(self.rs) - - self.out_op = FPOps(width, num_ops) - - def elaborate(self, platform): - m = Module() - - pe = PriorityEncoder(self.num_rows) - m.submodules.selector = pe - m.submodules.out_op = self.out_op - m.submodules += self.rs - - # connect priority encoder - in_ready = [] - for i in range(self.num_rows): - in_ready.append(self.rs[i].ready) - m.d.comb += pe.i.eq(Cat(*in_ready)) - - active = Signal(reset_less=True) - out_en = Signal(reset_less=True) - m.d.comb += active.eq(~pe.n) # encoder active - m.d.comb += out_en.eq(active & self.out_op.trigger) - - # encoder active: ack relevant input, record MID, pass output - with m.If(out_en): - rs = self.rs[pe.o] - m.d.sync += self.mid.eq(pe.o) - m.d.sync += rs.ack.eq(0) - m.d.sync += self.out_op.stb.eq(0) - for j in range(self.num_ops): - m.d.sync += self.out_op.v[j].eq(rs.out_op[j]) - with m.Else(): - m.d.sync += self.out_op.stb.eq(1) - # acks all default to zero - for i in range(self.num_rows): - m.d.sync += self.rs[i].ack.eq(1) - - return m - - def ports(self): - res = [] - for i in range(self.num_rows): - inop = self.rs[i] - res += inop.in_op + [inop.stb] - return self.out_op.ports() + res + [self.mid] - - diff --git a/src/add/iocontrol.py b/src/add/iocontrol.py deleted file mode 100644 index 3d823c9b..00000000 --- a/src/add/iocontrol.py +++ /dev/null @@ -1,306 +0,0 @@ -""" IO Control API - - Associated development bugs: - * http://bugs.libre-riscv.org/show_bug.cgi?id=64 - * http://bugs.libre-riscv.org/show_bug.cgi?id=57 - - Stage API: - --------- - - stage requires compliance with a strict API that may be - implemented in several means, including as a static class. - - Stages do not HOLD data, and they definitely do not contain - signalling (ready/valid). They do however specify the FORMAT - of the incoming and outgoing data, and they provide a means to - PROCESS that data (from incoming format to outgoing format). - - Stage Blocks really must be combinatorial blocks. It would be ok - to have input come in from sync'd sources (clock-driven) however by - doing so they would no longer be deterministic, and chaining such - blocks with such side-effects together could result in unexpected, - unpredictable, unreproduceable behaviour. - So generally to be avoided, then unless you know what you are doing. - - the methods of a stage instance must be as follows: - - * ispec() - Input data format specification. Takes a bit of explaining. - The requirements are: something that eventually derives from - nmigen Value must be returned *OR* an iterator or iterable - or sequence (list, tuple etc.) or generator must *yield* - thing(s) that (eventually) derive from the nmigen Value class. - - Complex to state, very simple in practice: - see test_buf_pipe.py for over 25 worked examples. - - * ospec() - Output data format specification. - format requirements identical to ispec. - - * process(m, i) - Optional function for processing ispec-formatted data. - returns a combinatorial block of a result that - may be assigned to the output, by way of the "nmoperator.eq" - function. Note that what is returned here can be - extremely flexible. Even a dictionary can be returned - as long as it has fields that match precisely with the - Record into which its values is intended to be assigned. - Again: see example unit tests for details. - - * setup(m, i) - Optional function for setting up submodules. - may be used for more complex stages, to link - the input (i) to submodules. must take responsibility - for adding those submodules to the module (m). - the submodules must be combinatorial blocks and - must have their inputs and output linked combinatorially. - - Both StageCls (for use with non-static classes) and Stage (for use - by static classes) are abstract classes from which, for convenience - and as a courtesy to other developers, anything conforming to the - Stage API may *choose* to derive. See Liskov Substitution Principle: - https://en.wikipedia.org/wiki/Liskov_substitution_principle - - StageChain: - ---------- - - A useful combinatorial wrapper around stages that chains them together - and then presents a Stage-API-conformant interface. By presenting - the same API as the stages it wraps, it can clearly be used recursively. - - ControlBase: - ----------- - - The base class for pipelines. Contains previous and next ready/valid/data. - Also has an extremely useful "connect" function that can be used to - connect a chain of pipelines and present the exact same prev/next - ready/valid/data API. - - Note: pipelines basically do not become pipelines as such until - handed to a derivative of ControlBase. ControlBase itself is *not* - strictly considered a pipeline class. Wishbone and AXI4 (master or - slave) could be derived from ControlBase, for example. -""" - -from nmigen import Signal, Cat, Const, Module, Value, Elaboratable -from nmigen.cli import verilog, rtlil -from nmigen.hdl.rec import Record - -from collections.abc import Sequence, Iterable -from collections import OrderedDict - -import nmoperator - - -class Object: - def __init__(self): - self.fields = OrderedDict() - - def __setattr__(self, k, v): - print ("kv", k, v) - if (k.startswith('_') or k in ["fields", "name", "src_loc"] or - k in dir(Object) or "fields" not in self.__dict__): - return object.__setattr__(self, k, v) - self.fields[k] = v - - def __getattr__(self, k): - if k in self.__dict__: - return object.__getattr__(self, k) - try: - return self.fields[k] - except KeyError as e: - raise AttributeError(e) - - def __iter__(self): - for x in self.fields.values(): # OrderedDict so order is preserved - if isinstance(x, Iterable): - yield from x - else: - yield x - - def eq(self, inp): - res = [] - for (k, o) in self.fields.items(): - i = getattr(inp, k) - print ("eq", o, i) - rres = o.eq(i) - if isinstance(rres, Sequence): - res += rres - else: - res.append(rres) - print (res) - return res - - def ports(self): # being called "keys" would be much better - return list(self) - - -class RecordObject(Record): - def __init__(self, layout=None, name=None): - Record.__init__(self, layout=layout or [], name=None) - - def __setattr__(self, k, v): - #print (dir(Record)) - if (k.startswith('_') or k in ["fields", "name", "src_loc"] or - k in dir(Record) or "fields" not in self.__dict__): - return object.__setattr__(self, k, v) - self.fields[k] = v - #print ("RecordObject setattr", k, v) - if isinstance(v, Record): - newlayout = {k: (k, v.layout)} - elif isinstance(v, Value): - newlayout = {k: (k, v.shape())} - else: - newlayout = {k: (k, nmoperator.shape(v))} - self.layout.fields.update(newlayout) - - def __iter__(self): - for x in self.fields.values(): # remember: fields is an OrderedDict - if isinstance(x, Iterable): - yield from x # a bit like flatten (nmigen.tools) - else: - yield x - - def ports(self): # would be better being called "keys" - return list(self) - - -class PrevControl(Elaboratable): - """ contains signals that come *from* the previous stage (both in and out) - * valid_i: previous stage indicating all incoming data is valid. - may be a multi-bit signal, where all bits are required - to be asserted to indicate "valid". - * ready_o: output to next stage indicating readiness to accept data - * data_i : an input - MUST be added by the USER of this class - """ - - def __init__(self, i_width=1, stage_ctl=False): - self.stage_ctl = stage_ctl - self.valid_i = Signal(i_width, name="p_valid_i") # prev >>in self - self._ready_o = Signal(name="p_ready_o") # prev < 1: - # multi-bit case: valid only when valid_i is all 1s - all1s = Const(-1, (len(self.valid_i), False)) - valid_i = (self.valid_i == all1s) - else: - # single-bit valid_i case - valid_i = self.valid_i - - # when stage indicates not ready, incoming data - # must "appear" to be not ready too - if self.stage_ctl: - valid_i = valid_i & self.s_ready_o - - return valid_i - - def elaborate(self, platform): - m = Module() - m.d.comb += self.trigger.eq(self.valid_i_test & self.ready_o) - return m - - def eq(self, i): - return [nmoperator.eq(self.data_i, i.data_i), - self.ready_o.eq(i.ready_o), - self.valid_i.eq(i.valid_i)] - - def __iter__(self): - yield self.valid_i - yield self.ready_o - if hasattr(self.data_i, "ports"): - yield from self.data_i.ports() - elif isinstance(self.data_i, Sequence): - yield from self.data_i - else: - yield self.data_i - - def ports(self): - return list(self) - - -class NextControl(Elaboratable): - """ contains the signals that go *to* the next stage (both in and out) - * valid_o: output indicating to next stage that data is valid - * ready_i: input from next stage indicating that it can accept data - * data_o : an output - MUST be added by the USER of this class - """ - def __init__(self, stage_ctl=False): - self.stage_ctl = stage_ctl - self.valid_o = Signal(name="n_valid_o") # self out>> next - self.ready_i = Signal(name="n_ready_i") # self < 1: - r_data = Array(r_data) - p_valid_i = Array(p_valid_i) - n_ready_in = Array(n_ready_in) - data_valid = Array(data_valid) - - nirn = Signal(reset_less=True) - m.d.comb += nirn.eq(~self.n.ready_i) - mid = self.p_mux.m_id - for i in range(p_len): - m.d.comb += data_valid[i].eq(0) - m.d.comb += n_ready_in[i].eq(1) - m.d.comb += p_valid_i[i].eq(0) - m.d.comb += self.p[i].ready_o.eq(0) - m.d.comb += p_valid_i[mid].eq(self.p_mux.active) - m.d.comb += self.p[mid].ready_o.eq(~data_valid[mid] | self.n.ready_i) - m.d.comb += n_ready_in[mid].eq(nirn & data_valid[mid]) - anyvalid = Signal(i, reset_less=True) - av = [] - for i in range(p_len): - av.append(data_valid[i]) - anyvalid = Cat(*av) - m.d.comb += self.n.valid_o.eq(anyvalid.bool()) - m.d.comb += data_valid[mid].eq(p_valid_i[mid] | \ - (n_ready_in[mid] & data_valid[mid])) - - for i in range(p_len): - vr = Signal(reset_less=True) - m.d.comb += vr.eq(self.p[i].valid_i & self.p[i].ready_o) - with m.If(vr): - m.d.comb += eq(r_data[i], self.p[i].data_i) - - m.d.comb += eq(self.n.data_o, self.process(r_data[mid])) - - return m - - -class CombMuxOutPipe(CombMultiOutPipeline): - def __init__(self, stage, n_len): - # HACK: stage is also the n-way multiplexer - CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage) - - # HACK: n-mux is also the stage... so set the muxid equal to input mid - stage.m_id = self.p.data_i.mid - - - -class InputPriorityArbiter(Elaboratable): - """ arbitration module for Input-Mux pipe, baed on PriorityEncoder - """ - def __init__(self, pipe, num_rows): - self.pipe = pipe - self.num_rows = num_rows - self.mmax = int(log(self.num_rows) / log(2)) - self.m_id = Signal(self.mmax, reset_less=True) # multiplex id - self.active = Signal(reset_less=True) - - def elaborate(self, platform): - m = Module() - - assert len(self.pipe.p) == self.num_rows, \ - "must declare input to be same size" - pe = PriorityEncoder(self.num_rows) - m.submodules.selector = pe - - # connect priority encoder - in_ready = [] - for i in range(self.num_rows): - p_valid_i = Signal(reset_less=True) - m.d.comb += p_valid_i.eq(self.pipe.p[i].valid_i_test) - in_ready.append(p_valid_i) - m.d.comb += pe.i.eq(Cat(*in_ready)) # array of input "valids" - m.d.comb += self.active.eq(~pe.n) # encoder active (one input valid) - m.d.comb += self.m_id.eq(pe.o) # output one active input - - return m - - def ports(self): - return [self.m_id, self.active] - - - -class PriorityCombMuxInPipe(CombMultiInPipeline): - """ an example of how to use the combinatorial pipeline. - """ - - def __init__(self, stage, p_len=2): - p_mux = InputPriorityArbiter(self, p_len) - CombMultiInPipeline.__init__(self, stage, p_len, p_mux) - - -if __name__ == '__main__': - - dut = PriorityCombMuxInPipe(ExampleStage) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_combpipe.il", "w") as f: - f.write(vl) diff --git a/src/add/nmigen_add_experiment.py b/src/add/nmigen_add_experiment.py deleted file mode 100644 index ecb1d35b..00000000 --- a/src/add/nmigen_add_experiment.py +++ /dev/null @@ -1,28 +0,0 @@ -# IEEE Floating Point Adder (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen.cli import main, verilog -from fpadd.statemachine import FPADDBase, FPADD -from fpadd.pipeline import FPADDMuxInOut - -if __name__ == "__main__": - if True: - alu = FPADD(width=32, id_wid=5, single_cycle=True) - main(alu, ports=alu.rs[0][0].ports() + \ - alu.rs[0][1].ports() + \ - alu.res[0].ports() + \ - [alu.ids.in_mid, alu.ids.out_mid]) - else: - alu = FPADDBase(width=32, id_wid=5, single_cycle=True) - main(alu, ports=[alu.in_a, alu.in_b] + \ - alu.in_t.ports() + \ - alu.out_z.ports() + \ - [alu.in_mid, alu.out_mid]) - - - # works... but don't use, just do "python fname.py convert -t v" - #print (verilog.convert(alu, ports=[ - # ports=alu.in_a.ports() + \ - # alu.in_b.ports() + \ - # alu.out_z.ports()) diff --git a/src/add/nmigen_div_experiment.py b/src/add/nmigen_div_experiment.py deleted file mode 100644 index a7e215cb..00000000 --- a/src/add/nmigen_div_experiment.py +++ /dev/null @@ -1,246 +0,0 @@ -# IEEE Floating Point Divider (Single Precision) -# Copyright (C) Jonathan P Dawson 2013 -# 2013-12-12 - -from nmigen import Module, Signal, Const, Cat -from nmigen.cli import main, verilog - -from fpbase import FPNumIn, FPNumOut, FPOpIn, FPOpOut, Overflow, FPBase, FPState -from singlepipe import eq - -class Div: - def __init__(self, width): - self.width = width - self.quot = Signal(width) # quotient - self.dor = Signal(width) # divisor - self.dend = Signal(width) # dividend - self.rem = Signal(width) # remainder - self.count = Signal(7) # loop count - - self.czero = Const(0, width) - - def reset(self, m): - m.d.sync += [ - self.quot.eq(self.czero), - self.rem.eq(self.czero), - self.count.eq(Const(0, 7)) - ] - - -class FPDIV(FPBase): - - def __init__(self, width): - FPBase.__init__(self) - self.width = width - - self.in_a = FPOpIn(width) - self.in_b = FPOpIn(width) - self.out_z = FPOpOut(width) - - self.states = [] - - def add_state(self, state): - self.states.append(state) - return state - - def elaborate(self, platform=None): - """ creates the HDL code-fragment for FPDiv - """ - m = Module() - - # Latches - a = FPNumIn(None, self.width, False) - b = FPNumIn(None, self.width, False) - z = FPNumOut(self.width, False) - - div = Div(a.m_width*2 + 3) # double the mantissa width plus g/r/sticky - - of = Overflow() - m.submodules.in_a = a - m.submodules.in_b = b - m.submodules.z = z - m.submodules.of = of - - m.d.comb += a.v.eq(self.in_a.v) - m.d.comb += b.v.eq(self.in_b.v) - - with m.FSM() as fsm: - - # ****** - # gets operand a - - with m.State("get_a"): - res = self.get_op(m, self.in_a, a, "get_b") - m.d.sync += eq([a, self.in_a.ready_o], res) - - # ****** - # gets operand b - - with m.State("get_b"): - res = self.get_op(m, self.in_b, b, "special_cases") - m.d.sync += eq([b, self.in_b.ready_o], res) - - # ****** - # special cases: NaNs, infs, zeros, denormalised - # NOTE: some of these are unique to div. see "Special Operations" - # https://steve.hollasch.net/cgindex/coding/ieeefloat.html - - with m.State("special_cases"): - - # if a is NaN or b is NaN return NaN - with m.If(a.is_nan | b.is_nan): - m.next = "put_z" - m.d.sync += z.nan(1) - - # if a is Inf and b is Inf return NaN - with m.Elif(a.is_inf & b.is_inf): - m.next = "put_z" - m.d.sync += z.nan(1) - - # if a is inf return inf (or NaN if b is zero) - with m.Elif(a.is_inf): - m.next = "put_z" - m.d.sync += z.inf(a.s ^ b.s) - - # if b is inf return zero - with m.Elif(b.is_inf): - m.next = "put_z" - m.d.sync += z.zero(a.s ^ b.s) - - # if a is zero return zero (or NaN if b is zero) - with m.Elif(a.is_zero): - m.next = "put_z" - # if b is zero return NaN - with m.If(b.is_zero): - m.d.sync += z.nan(1) - with m.Else(): - m.d.sync += z.zero(a.s ^ b.s) - - # if b is zero return Inf - with m.Elif(b.is_zero): - m.next = "put_z" - m.d.sync += z.inf(a.s ^ b.s) - - # Denormalised Number checks - with m.Else(): - m.next = "normalise_a" - self.denormalise(m, a) - self.denormalise(m, b) - - # ****** - # normalise_a - - with m.State("normalise_a"): - self.op_normalise(m, a, "normalise_b") - - # ****** - # normalise_b - - with m.State("normalise_b"): - self.op_normalise(m, b, "divide_0") - - # ****** - # First stage of divide. initialise state - - with m.State("divide_0"): - m.next = "divide_1" - m.d.sync += [ - z.s.eq(a.s ^ b.s), # sign - z.e.eq(a.e - b.e), # exponent - div.dend.eq(a.m<<(a.m_width+3)), # 3 bits for g/r/sticky - div.dor.eq(b.m), - ] - div.reset(m) - - # ****** - # Second stage of divide. - - with m.State("divide_1"): - m.next = "divide_2" - m.d.sync += [ - div.quot.eq(div.quot << 1), - div.rem.eq(Cat(div.dend[-1], div.rem[0:])), - div.dend.eq(div.dend << 1), - ] - - # ****** - # Third stage of divide. - # This stage ends by jumping out to divide_3 - # However it defaults to jumping to divide_1 (which comes back here) - - with m.State("divide_2"): - with m.If(div.rem >= div.dor): - m.d.sync += [ - div.quot[0].eq(1), - div.rem.eq(div.rem - div.dor), - ] - with m.If(div.count == div.width-2): - m.next = "divide_3" - with m.Else(): - m.next = "divide_1" - m.d.sync += [ - div.count.eq(div.count + 1), - ] - - # ****** - # Fourth stage of divide. - - with m.State("divide_3"): - m.next = "normalise_1" - m.d.sync += [ - z.m.eq(div.quot[3:]), - of.guard.eq(div.quot[2]), - of.round_bit.eq(div.quot[1]), - of.sticky.eq(div.quot[0] | (div.rem != 0)) - ] - - # ****** - # First stage of normalisation. - - with m.State("normalise_1"): - self.normalise_1(m, z, of, "normalise_2") - - # ****** - # Second stage of normalisation. - - with m.State("normalise_2"): - self.normalise_2(m, z, of, "round") - - # ****** - # rounding stage - - with m.State("round"): - self.roundz(m, z, of.roundz) - m.next = "corrections" - - # ****** - # correction stage - - with m.State("corrections"): - self.corrections(m, z, "pack") - - # ****** - # pack stage - - with m.State("pack"): - self.pack(m, z, "put_z") - - # ****** - # put_z stage - - with m.State("put_z"): - self.put_z(m, z, self.out_z, "get_a") - - return m - - -if __name__ == "__main__": - alu = FPDIV(width=32) - main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports()) - - - # works... but don't use, just do "python fname.py convert -t v" - #print (verilog.convert(alu, ports=[ - # ports=alu.in_a.ports() + \ - # alu.in_b.ports() + \ - # alu.out_z.ports()) diff --git a/src/add/nmoperator.py b/src/add/nmoperator.py deleted file mode 100644 index bd5e5544..00000000 --- a/src/add/nmoperator.py +++ /dev/null @@ -1,171 +0,0 @@ -""" nmigen operator functions / utils - - eq: - -- - - a strategically very important function that is identical in function - to nmigen's Signal.eq function, except it may take objects, or a list - of objects, or a tuple of objects, and where objects may also be - Records. -""" - -from nmigen import Signal, Cat, Const, Mux, Module, Value, Elaboratable -from nmigen.cli import verilog, rtlil -from nmigen.lib.fifo import SyncFIFO, SyncFIFOBuffered -from nmigen.hdl.ast import ArrayProxy -from nmigen.hdl.rec import Record, Layout - -from abc import ABCMeta, abstractmethod -from collections.abc import Sequence, Iterable -from collections import OrderedDict -from queue import Queue -import inspect - - -class Visitor2: - """ a helper class for iterating twin-argument compound data structures. - - Record is a special (unusual, recursive) case, where the input may be - specified as a dictionary (which may contain further dictionaries, - recursively), where the field names of the dictionary must match - the Record's field spec. Alternatively, an object with the same - member names as the Record may be assigned: it does not have to - *be* a Record. - - ArrayProxy is also special-cased, it's a bit messy: whilst ArrayProxy - has an eq function, the object being assigned to it (e.g. a python - object) might not. despite the *input* having an eq function, - that doesn't help us, because it's the *ArrayProxy* that's being - assigned to. so.... we cheat. use the ports() function of the - python object, enumerate them, find out the list of Signals that way, - and assign them. - """ - def iterator2(self, o, i): - if isinstance(o, dict): - yield from self.dict_iter2(o, i) - - if not isinstance(o, Sequence): - o, i = [o], [i] - for (ao, ai) in zip(o, i): - #print ("visit", fn, ao, ai) - if isinstance(ao, Record): - yield from self.record_iter2(ao, ai) - elif isinstance(ao, ArrayProxy) and not isinstance(ai, Value): - yield from self.arrayproxy_iter2(ao, ai) - else: - yield (ao, ai) - - def dict_iter2(self, o, i): - for (k, v) in o.items(): - print ("d-iter", v, i[k]) - yield (v, i[k]) - return res - - def _not_quite_working_with_all_unit_tests_record_iter2(self, ao, ai): - print ("record_iter2", ao, ai, type(ao), type(ai)) - if isinstance(ai, Value): - if isinstance(ao, Sequence): - ao, ai = [ao], [ai] - for o, i in zip(ao, ai): - yield (o, i) - return - for idx, (field_name, field_shape, _) in enumerate(ao.layout): - if isinstance(field_shape, Layout): - val = ai.fields - else: - val = ai - if hasattr(val, field_name): # check for attribute - val = getattr(val, field_name) - else: - val = val[field_name] # dictionary-style specification - yield from self.iterator2(ao.fields[field_name], val) - - def record_iter2(self, ao, ai): - for idx, (field_name, field_shape, _) in enumerate(ao.layout): - if isinstance(field_shape, Layout): - val = ai.fields - else: - val = ai - if hasattr(val, field_name): # check for attribute - val = getattr(val, field_name) - else: - val = val[field_name] # dictionary-style specification - yield from self.iterator2(ao.fields[field_name], val) - - def arrayproxy_iter2(self, ao, ai): - for p in ai.ports(): - op = getattr(ao, p.name) - print ("arrayproxy - p", p, p.name) - yield from self.iterator2(op, p) - - -class Visitor: - """ a helper class for iterating single-argument compound data structures. - similar to Visitor2. - """ - def iterate(self, i): - """ iterate a compound structure recursively using yield - """ - if not isinstance(i, Sequence): - i = [i] - for ai in i: - #print ("iterate", ai) - if isinstance(ai, Record): - #print ("record", list(ai.layout)) - yield from self.record_iter(ai) - elif isinstance(ai, ArrayProxy) and not isinstance(ai, Value): - yield from self.array_iter(ai) - else: - yield ai - - def record_iter(self, ai): - for idx, (field_name, field_shape, _) in enumerate(ai.layout): - if isinstance(field_shape, Layout): - val = ai.fields - else: - val = ai - if hasattr(val, field_name): # check for attribute - val = getattr(val, field_name) - else: - val = val[field_name] # dictionary-style specification - #print ("recidx", idx, field_name, field_shape, val) - yield from self.iterate(val) - - def array_iter(self, ai): - for p in ai.ports(): - yield from self.iterate(p) - - -def eq(o, i): - """ makes signals equal: a helper routine which identifies if it is being - passed a list (or tuple) of objects, or signals, or Records, and calls - the objects' eq function. - """ - res = [] - for (ao, ai) in Visitor2().iterator2(o, i): - rres = ao.eq(ai) - if not isinstance(rres, Sequence): - rres = [rres] - res += rres - return res - - -def shape(i): - #print ("shape", i) - r = 0 - for part in list(i): - #print ("shape?", part) - s, _ = part.shape() - r += s - return r, False - - -def cat(i): - """ flattens a compound structure recursively using Cat - """ - from nmigen.tools import flatten - #res = list(flatten(i)) # works (as of nmigen commit f22106e5) HOWEVER... - res = list(Visitor().iterate(i)) # needed because input may be a sequence - return Cat(*res) - - diff --git a/src/add/pipeline.py b/src/add/pipeline.py deleted file mode 100644 index afcee743..00000000 --- a/src/add/pipeline.py +++ /dev/null @@ -1,394 +0,0 @@ -""" Example 5: Making use of PyRTL and Introspection. """ - -from collections.abc import Sequence - -from nmigen import Signal -from nmigen.hdl.rec import Record -from nmigen import tracer -from nmigen.compat.fhdl.bitcontainer import value_bits_sign -from contextlib import contextmanager - -from nmoperator import eq -from singlepipe import StageCls, ControlBase, BufferedHandshake -from singlepipe import UnbufferedPipeline - - -# The following example shows how pyrtl can be used to make some interesting -# hardware structures using python introspection. In particular, this example -# makes a N-stage pipeline structure. Any specific pipeline is then a derived -# class of SimplePipeline where methods with names starting with "stage" are -# stages, and new members with names not starting with "_" are to be registered -# for the next stage. - -def like(value, rname, pipe, pipemode=False): - if isinstance(value, ObjectProxy): - return ObjectProxy.like(pipe, value, pipemode=pipemode, - name=rname, reset_less=True) - else: - return Signal(value_bits_sign(value), name=rname, - reset_less=True) - return Signal.like(value, name=rname, reset_less=True) - -def get_assigns(_assigns): - assigns = [] - for e in _assigns: - if isinstance(e, ObjectProxy): - assigns += get_assigns(e._assigns) - else: - assigns.append(e) - return assigns - - -def get_eqs(_eqs): - eqs = [] - for e in _eqs: - if isinstance(e, ObjectProxy): - eqs += get_eqs(e._eqs) - else: - eqs.append(e) - return eqs - - -class ObjectProxy: - def __init__(self, m, name=None, pipemode=False, syncmode=True): - self._m = m - if name is None: - name = tracer.get_var_name(default=None) - self.name = name - self._pipemode = pipemode - self._syncmode = syncmode - self._eqs = {} - self._assigns = [] - self._preg_map = {} - - @classmethod - def like(cls, m, value, pipemode=False, name=None, src_loc_at=0, **kwargs): - name = name or tracer.get_var_name(depth=2 + src_loc_at, - default="$like") - - src_loc_at_1 = 1 + src_loc_at - r = ObjectProxy(m, value.name, pipemode) - #for a, aname in value._preg_map.items(): - # r._preg_map[aname] = like(a, aname, m, pipemode) - for a in value.ports(): - aname = a.name - r._preg_map[aname] = like(a, aname, m, pipemode) - return r - - def __repr__(self): - subobjs = [] - for a in self.ports(): - aname = a.name - ai = self._preg_map[aname] - subobjs.append(repr(ai)) - return "" % subobjs - - def get_specs(self, liked=False): - res = [] - for k, v in self._preg_map.items(): - #v = like(v, k, stage._m) - res.append(v) - if isinstance(v, ObjectProxy): - res += v.get_specs() - return res - - def eq(self, i): - print ("ObjectProxy eq", self, i) - res = [] - for a in self.ports(): - aname = a.name - ai = i._preg_map[aname] - res.append(a.eq(ai)) - return res - - def ports(self): - res = [] - for aname, a in self._preg_map.items(): - if isinstance(a, Signal) or isinstance(a, ObjectProxy) or \ - isinstance(a, Record): - res.append(a) - #print ("ObjectPorts", res) - return res - - def __getattr__(self, name): - try: - v = self._preg_map[name] - return v - #return like(v, name, self._m) - except KeyError: - raise AttributeError( - 'error, no pipeline register "%s" defined for OP %s' - % (name, self.name)) - - def __setattr__(self, name, value): - if name.startswith('_') or name in ['name', 'ports', 'eq', 'like']: - # do not do anything tricky with variables starting with '_' - object.__setattr__(self, name, value) - return - #rname = "%s_%s" % (self.name, name) - rname = name - new_pipereg = like(value, rname, self._m, self._pipemode) - self._preg_map[name] = new_pipereg - #object.__setattr__(self, name, new_pipereg) - if self._pipemode: - #print ("OP pipemode", self._syncmode, new_pipereg, value) - assign = eq(new_pipereg, value) - if self._syncmode: - self._m.d.sync += assign - else: - self._m.d.comb += assign - elif self._m: - #print ("OP !pipemode assign", new_pipereg, value, type(value)) - self._m.d.comb += eq(new_pipereg, value) - else: - #print ("OP !pipemode !m", new_pipereg, value, type(value)) - self._assigns += eq(new_pipereg, value) - if isinstance(value, ObjectProxy): - #print ("OP, defer assigns:", value._assigns) - self._assigns += value._assigns - self._eqs.append(value._eqs) - - -class PipelineStage: - """ Pipeline builder stage with auto generation of pipeline registers. - """ - - def __init__(self, name, m, prev=None, pipemode=False, ispec=None): - self._m = m - self._stagename = name - self._preg_map = {'__nextstage__': {}} - self._prev_stage = prev - self._ispec = ispec - if ispec: - self._preg_map[self._stagename] = ispec - if prev: - print ("prev", prev._stagename, prev._preg_map) - #if prev._stagename in prev._preg_map: - # m = prev._preg_map[prev._stagename] - # self._preg_map[prev._stagename] = m - if '__nextstage__' in prev._preg_map: - m = prev._preg_map['__nextstage__'] - m = likedict(m) - self._preg_map[self._stagename] = m - #for k, v in m.items(): - #m[k] = like(v, k, self._m) - print ("make current", self._stagename, m) - self._pipemode = pipemode - self._eqs = {} - self._assigns = [] - - def __getattribute__(self, name): - if name.startswith('_'): - return object.__getattribute__(self, name) - #if name in self._preg_map['__nextstage__']: - # return self._preg_map['__nextstage__'][name] - try: - print ("getattr", name, object.__getattribute__(self, '_preg_map')) - v = self._preg_map[self._stagename][name] - return v - #return like(v, name, self._m) - except KeyError: - raise AttributeError( - 'error, no pipeline register "%s" defined for stage %s' - % (name, self._stagename)) - - def __setattr__(self, name, value): - if name.startswith('_'): - # do not do anything tricky with variables starting with '_' - object.__setattr__(self, name, value) - return - pipereg_id = self._stagename - rname = 'pipereg_' + pipereg_id + '_' + name - new_pipereg = like(value, rname, self._m, self._pipemode) - next_stage = '__nextstage__' - if next_stage not in self._preg_map: - self._preg_map[next_stage] = {} - self._preg_map[next_stage][name] = new_pipereg - print ("setattr", name, value, self._preg_map) - if self._pipemode: - self._eqs[name] = new_pipereg - assign = eq(new_pipereg, value) - print ("pipemode: append", new_pipereg, value, assign) - if isinstance(value, ObjectProxy): - print ("OP, assigns:", value._assigns) - self._assigns += value._assigns - self._eqs[name]._eqs = value._eqs - #self._m.d.comb += assign - self._assigns += assign - elif self._m: - print ("!pipemode: assign", new_pipereg, value) - assign = eq(new_pipereg, value) - self._m.d.sync += assign - else: - print ("!pipemode !m: defer assign", new_pipereg, value) - assign = eq(new_pipereg, value) - self._eqs[name] = new_pipereg - self._assigns += assign - if isinstance(value, ObjectProxy): - print ("OP, defer assigns:", value._assigns) - self._assigns += value._assigns - self._eqs[name]._eqs = value._eqs - -def likelist(specs): - res = [] - for v in specs: - res.append(like(v, v.name, None, pipemode=True)) - return res - -def likedict(specs): - if not isinstance(specs, dict): - return like(specs, specs.name, None, pipemode=True) - res = {} - for k, v in specs.items(): - res[k] = likedict(v) - return res - - -class AutoStage(StageCls): - def __init__(self, inspecs, outspecs, eqs, assigns): - self.inspecs, self.outspecs = inspecs, outspecs - self.eqs, self.assigns = eqs, assigns - #self.o = self.ospec() - def ispec(self): return likedict(self.inspecs) - def ospec(self): return likedict(self.outspecs) - - def process(self, i): - print ("stage process", i) - return self.eqs - - def setup(self, m, i): - print ("stage setup i", i, m) - print ("stage setup inspecs", self.inspecs) - print ("stage setup outspecs", self.outspecs) - print ("stage setup eqs", self.eqs) - #self.o = self.ospec() - m.d.comb += eq(self.inspecs, i) - #m.d.comb += eq(self.outspecs, self.eqs) - #m.d.comb += eq(self.o, i) - - -class AutoPipe(UnbufferedPipeline): - def __init__(self, stage, assigns): - UnbufferedPipeline.__init__(self, stage) - self.assigns = assigns - - def elaborate(self, platform): - m = UnbufferedPipeline.elaborate(self, platform) - m.d.comb += self.assigns - print ("assigns", self.assigns, m) - return m - - -class PipeManager: - def __init__(self, m, pipemode=False, pipetype=None): - self.m = m - self.pipemode = pipemode - self.pipetype = pipetype - - @contextmanager - def Stage(self, name, prev=None, ispec=None): - if ispec: - ispec = likedict(ispec) - print ("start stage", name, ispec) - stage = PipelineStage(name, None, prev, self.pipemode, ispec=ispec) - try: - yield stage, self.m #stage._m - finally: - pass - if self.pipemode: - if stage._ispec: - print ("use ispec", stage._ispec) - inspecs = stage._ispec - else: - inspecs = self.get_specs(stage, name) - #inspecs = likedict(inspecs) - outspecs = self.get_specs(stage, '__nextstage__', liked=True) - print ("stage inspecs", name, inspecs) - print ("stage outspecs", name, outspecs) - eqs = stage._eqs # get_eqs(stage._eqs) - assigns = get_assigns(stage._assigns) - print ("stage eqs", name, eqs) - print ("stage assigns", name, assigns) - s = AutoStage(inspecs, outspecs, eqs, assigns) - self.stages.append(s) - print ("end stage", name, self.pipemode, "\n") - - def get_specs(self, stage, name, liked=False): - return stage._preg_map[name] - if name in stage._preg_map: - res = [] - for k, v in stage._preg_map[name].items(): - #v = like(v, k, stage._m) - res.append(v) - #if isinstance(v, ObjectProxy): - # res += v.get_specs() - return res - return {} - - def __enter__(self): - self.stages = [] - return self - - def __exit__(self, *args): - print ("exit stage", args) - pipes = [] - cb = ControlBase() - for s in self.stages: - print ("stage specs", s, s.inspecs, s.outspecs) - if self.pipetype == 'buffered': - p = BufferedHandshake(s) - else: - p = AutoPipe(s, s.assigns) - pipes.append(p) - self.m.submodules += p - - self.m.d.comb += cb.connect(pipes) - - -class SimplePipeline: - """ Pipeline builder with auto generation of pipeline registers. - """ - - def __init__(self, m): - self._m = m - self._pipeline_register_map = {} - self._current_stage_num = 0 - - def _setup(self): - stage_list = [] - for method in dir(self): - if method.startswith('stage'): - stage_list.append(method) - for stage in sorted(stage_list): - stage_method = getattr(self, stage) - stage_method() - self._current_stage_num += 1 - - def __getattr__(self, name): - try: - return self._pipeline_register_map[self._current_stage_num][name] - except KeyError: - raise AttributeError( - 'error, no pipeline register "%s" defined for stage %d' - % (name, self._current_stage_num)) - - def __setattr__(self, name, value): - if name.startswith('_'): - # do not do anything tricky with variables starting with '_' - object.__setattr__(self, name, value) - return - next_stage = self._current_stage_num + 1 - pipereg_id = str(self._current_stage_num) + 'to' + str(next_stage) - rname = 'pipereg_' + pipereg_id + '_' + name - #new_pipereg = Signal(value_bits_sign(value), name=rname, - # reset_less=True) - if isinstance(value, ObjectProxy): - new_pipereg = ObjectProxy.like(self._m, value, - name=rname, reset_less = True) - else: - new_pipereg = Signal.like(value, name=rname, reset_less = True) - if next_stage not in self._pipeline_register_map: - self._pipeline_register_map[next_stage] = {} - self._pipeline_register_map[next_stage][name] = new_pipereg - self._m.d.sync += eq(new_pipereg, value) - diff --git a/src/add/pipeline_example.py b/src/add/pipeline_example.py deleted file mode 100644 index 799caf6d..00000000 --- a/src/add/pipeline_example.py +++ /dev/null @@ -1,204 +0,0 @@ -""" Example 5: Making use of PyRTL and Introspection. """ - -from nmigen import Module, Signal, Const -from nmigen.cli import main, verilog, rtlil - - -from pipeline import SimplePipeline, ObjectProxy, PipeManager - - -class SimplePipelineExample(SimplePipeline): - """ A very simple pipeline to show how registers are inferred. """ - - def __init__(self, pipe): - SimplePipeline.__init__(self, pipe) - self._loopback = Signal(4) - self._setup() - - def stage0(self): - self.n = ~self._loopback - - def stage1(self): - self.n = self.n + 2 - - def stage2(self): - localv = Signal(4) - self._pipe.comb += localv.eq(2) - self.n = self.n << localv - - def stage3(self): - self.n = ~self.n - - def stage4(self): - self._pipe.sync += self._loopback.eq(self.n + 3) - - -class ObjectBasedPipelineExample(SimplePipeline): - """ A very simple pipeline to show how registers are inferred. """ - - def __init__(self, m): - SimplePipeline.__init__(self, m) - self._loopback = Signal(4) - o = ObjectProxy(m) - o.a = Signal(4) - o.b = Signal(4) - self._obj = o - self._setup() - - def stage0(self): - self.n = ~self._loopback - self.o = self._obj - - def stage1(self): - self.n = self.n + self.o.a - o = ObjectProxy(self._m) - o.c = self.n - o.d = self.o.b + self.n + Const(5) - self.o = o - - def stage2(self): - localv = Signal(4) - self._m.d.comb += localv.eq(2) - self.n = self.n << localv - o = ObjectProxy(self._m) - o.e = self.n + self.o.c + self.o.d - self.o = o - - def stage3(self): - self.n = ~self.n - self.o = self.o - self.o.e = self.o.e + self.n - - def stage4(self): - self._m.d.sync += self._loopback.eq(self.n + 3 + self.o.e) - - -class PipeModule: - - def __init__(self): - self.m = Module() - self.p = ObjectBasedPipelineExample(self.m) - - def elaborate(self, platform=None): - return self.m - - -class PipelineStageExample: - - def __init__(self): - self._loopback = Signal(4, name="loopback") - - def elaborate(self, platform=None): - - m = Module() - - with PipeManager(m, pipemode=True) as pipe: - - ispec={'loopback': self._loopback} - with pipe.Stage("first", ispec=ispec) as (p, m): - p.n = ~p.loopback - with pipe.Stage("second", p) as (p, m): - #p.n = ~self._loopback + 2 - p.n = p.n + Const(2) - with pipe.Stage("third", p) as (p, m): - #p.n = ~self._loopback + 5 - localv = Signal(4) - m.d.comb += localv.eq(2) - p.n = p.n << localv + Const(1) - #p.m = p.n + 2 - - print (pipe.stages) - - return m - -class PipelineStageObjectExample: - - def __init__(self): - self.loopback = Signal(4) - - def elaborate(self, platform=None): - - m = Module() - - o = ObjectProxy(None, pipemode=False) - o.a = Signal(4) - o.b = Signal(4) - self.obj = o - - localv2 = Signal(4) - m.d.sync += localv2.eq(localv2 + 3) - - #m.d.comb += self.obj.a.eq(localv2 + 1) - #m.d.sync += self._loopback.eq(localv2) - - ispec= {'loopback': self.loopback, 'obj': self.obj} - with PipeManager(m, pipemode=True) as pipe: - - with pipe.Stage("first", ispec=ispec) as (p, m): - p.n = ~p.loopback - p.o = p.obj - with pipe.Stage("second", p) as (p, m): - #p.n = ~self.loopback + 2 - localn = Signal(4) - m.d.comb += localn.eq(p.n) - o = ObjectProxy(None, pipemode=False) - o.c = localn - o.d = p.o.b + localn + Const(5) - p.n = localn - p.o = o - with pipe.Stage("third", p) as (p, m): - #p.n = ~self._loopback + 5 - localv = Signal(4) - m.d.comb += localv.eq(2) - p.n = p.n << localv - o = ObjectProxy(None, pipemode=False) - o.e = p.n + p.o.c + p.o.d - p.o = o - - print ("stages", pipe.stages) - - return m - - -class PipelineStageObjectExample2: - - def __init__(self): - self._loopback = Signal(4) - - def elaborate(self, platform=None): - - m = Module() - - ispec= [self._loopback] - with PipeManager(m, pipemode=True) as pipe: - - with pipe.Stage("first", - ispec=ispec) as (p, m): - p.n = ~self._loopback - o = ObjectProxy(None, pipemode=False) - o.b = ~self._loopback + Const(5) - p.o = o - - print ("stages", pipe.stages) - - return m - - - -if __name__ == "__main__": - example = PipeModule() - with open("pipe_module.il", "w") as f: - f.write(rtlil.convert(example, ports=[ - example.p._loopback, - ])) - example = PipelineStageExample() - with open("pipe_stage_module.il", "w") as f: - f.write(rtlil.convert(example, ports=[ - example._loopback, - ])) - #exit(0) - example = PipelineStageObjectExample() - with open("pipe_stage_object_module.il", "w") as f: - f.write(rtlil.convert(example, ports=[ - example.loopback, - ])) diff --git a/src/add/queue.py b/src/add/queue.py deleted file mode 100644 index 0038953d..00000000 --- a/src/add/queue.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) 2014 - 2019 The Regents of the University of -# California (Regents). All Rights Reserved. Redistribution and use in -# source and binary forms, with or without modification, are permitted -# provided that the following conditions are met: -# * Redistributions of source code must retain the above -# copyright notice, this list of conditions and the following -# two paragraphs of disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# two paragraphs of disclaimer in the documentation and/or other materials -# provided with the distribution. -# * Neither the name of the Regents nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, -# SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, -# ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF -# REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF -# ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION -# TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR -# MODIFICATIONS. - -from nmigen import Module, Signal, Memory, Mux, Elaboratable -from nmigen.tools import bits_for -from nmigen.cli import main -from nmigen.lib.fifo import FIFOInterface - -# translated from https://github.com/freechipsproject/chisel3/blob/a4a29e29c3f1eed18f851dcf10bdc845571dfcb6/src/main/scala/chisel3/util/Decoupled.scala#L185 # noqa - - -class Queue(FIFOInterface, Elaboratable): - def __init__(self, width, depth, fwft=True, pipe=False): - """ Queue (FIFO) with pipe mode and first-write fall-through capability - - * :width: width of Queue data in/out - * :depth: queue depth. NOTE: may be set to 0 (this is ok) - * :fwft : first-write, fall-through mode (Chisel Queue "flow" mode) - * :pipe : pipe mode. NOTE: this mode can cause unanticipated - problems. when read is enabled, so is writeable. - therefore if read is enabled, the data ABSOLUTELY MUST - be read. - - fwft mode = True basically means that the data may be transferred - combinatorially from input to output. - - Attributes: - * level: available free space (number of unread entries) - - din = enq_data, writable = enq_ready, we = enq_valid - dout = deq_data, re = deq_ready, readable = deq_valid - """ - FIFOInterface.__init__(self, width, depth, fwft) - self.pipe = pipe - self.depth = depth - self.level = Signal(bits_for(depth)) - - def elaborate(self, platform): - m = Module() - - # set up an SRAM. XXX bug in Memory: cannot create SRAM of depth 1 - ram = Memory(self.width, self.depth if self.depth > 1 else 2) - m.submodules.ram_read = ram_read = ram.read_port(synchronous=False) - m.submodules.ram_write = ram_write = ram.write_port() - - # convenience names - p_ready_o = self.writable - p_valid_i = self.we - enq_data = self.din - - n_valid_o = self.readable - n_ready_i = self.re - deq_data = self.dout - - # intermediaries - ptr_width = bits_for(self.depth - 1) if self.depth > 1 else 0 - enq_ptr = Signal(ptr_width) # cyclic pointer to "insert" point (wrport) - deq_ptr = Signal(ptr_width) # cyclic pointer to "remove" point (rdport) - maybe_full = Signal() # not reset_less (set by sync) - - # temporaries - do_enq = Signal(reset_less=True) - do_deq = Signal(reset_less=True) - ptr_diff = Signal(ptr_width) - ptr_match = Signal(reset_less=True) - empty = Signal(reset_less=True) - full = Signal(reset_less=True) - enq_max = Signal(reset_less=True) - deq_max = Signal(reset_less=True) - - m.d.comb += [ptr_match.eq(enq_ptr == deq_ptr), # read-ptr = write-ptr - ptr_diff.eq(enq_ptr - deq_ptr), - enq_max.eq(enq_ptr == self.depth - 1), - deq_max.eq(deq_ptr == self.depth - 1), - empty.eq(ptr_match & ~maybe_full), - full.eq(ptr_match & maybe_full), - do_enq.eq(p_ready_o & p_valid_i), # write conditions ok - do_deq.eq(n_ready_i & n_valid_o), # read conditions ok - - # set readable and writable (NOTE: see pipe mode below) - n_valid_o.eq(~empty), # cannot read if empty! - p_ready_o.eq(~full), # cannot write if full! - - # set up memory and connect to input and output - ram_write.addr.eq(enq_ptr), - ram_write.data.eq(enq_data), - ram_write.en.eq(do_enq), - ram_read.addr.eq(deq_ptr), - deq_data.eq(ram_read.data) # NOTE: overridden in fwft mode - ] - - # under write conditions, SRAM write-pointer moves on next clock - with m.If(do_enq): - m.d.sync += enq_ptr.eq(Mux(enq_max, 0, enq_ptr+1)) - - # under read conditions, SRAM read-pointer moves on next clock - with m.If(do_deq): - m.d.sync += deq_ptr.eq(Mux(deq_max, 0, deq_ptr+1)) - - # if read-but-not-write or write-but-not-read, maybe_full set - with m.If(do_enq != do_deq): - m.d.sync += maybe_full.eq(do_enq) - - # first-word fall-through: same as "flow" parameter in Chisel3 Queue - # basically instead of relying on the Memory characteristics (which - # in FPGAs do not have write-through), then when the queue is empty - # take the output directly from the input, i.e. *bypass* the SRAM. - # this done combinatorially to give the exact same characteristics - # as Memory "write-through"... without relying on a changing API - if self.fwft: - with m.If(p_valid_i): - m.d.comb += n_valid_o.eq(1) - with m.If(empty): - m.d.comb += deq_data.eq(enq_data) - m.d.comb += do_deq.eq(0) - with m.If(n_ready_i): - m.d.comb += do_enq.eq(0) - - # pipe mode: if next stage says it's ready (readable), we - # *must* declare the input ready (writeable). - if self.pipe: - with m.If(n_ready_i): - m.d.comb += p_ready_o.eq(1) - - # set the count (available free space), optimise on power-of-two - if self.depth == 1 << ptr_width: # is depth a power of 2 - m.d.comb += self.level.eq( - Mux(maybe_full & ptr_match, self.depth, 0) | ptr_diff) - else: - m.d.comb += self.level.eq(Mux(ptr_match, - Mux(maybe_full, self.depth, 0), - Mux(deq_ptr > enq_ptr, - self.depth + ptr_diff, - ptr_diff))) - - return m - - -if __name__ == "__main__": - reg_stage = Queue(1, 1, pipe=True) - break_ready_chain_stage = Queue(1, 1, pipe=True, fwft=True) - m = Module() - ports = [] - - def queue_ports(queue, name_prefix): - retval = [] - for name in ["level", - "dout", - "readable", - "writable"]: - port = getattr(queue, name) - signal = Signal(port.shape(), name=name_prefix+name) - m.d.comb += signal.eq(port) - retval.append(signal) - for name in ["re", - "din", - "we"]: - port = getattr(queue, name) - signal = Signal(port.shape(), name=name_prefix+name) - m.d.comb += port.eq(signal) - retval.append(signal) - return retval - - m.submodules.reg_stage = reg_stage - ports += queue_ports(reg_stage, "reg_stage_") - m.submodules.break_ready_chain_stage = break_ready_chain_stage - ports += queue_ports(break_ready_chain_stage, "break_ready_chain_stage_") - main(m, ports=ports) diff --git a/src/add/record_experiment.py b/src/add/record_experiment.py deleted file mode 100644 index 1789c3bd..00000000 --- a/src/add/record_experiment.py +++ /dev/null @@ -1,106 +0,0 @@ -from nmigen import Module, Signal, Mux, Const, Elaboratable -from nmigen.hdl.rec import Record, Layout, DIR_NONE -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen.compat.fhdl.bitcontainer import value_bits_sign -from singlepipe import cat, RecordObject - - -class RecordTest: - - def __init__(self): - self.r1 = RecordObject() - self.r1.sig1 = Signal(16) - self.r1.r2 = RecordObject() - self.r1.r2.sig2 = Signal(16) - self.r1.r3 = RecordObject() - self.r1.r3.sig3 = Signal(16) - self.sig123 = Signal(48) - - def elaborate(self, platform): - m = Module() - - sig1 = Signal(16) - m.d.comb += sig1.eq(self.r1.sig1) - sig2 = Signal(16) - m.d.comb += sig2.eq(self.r1.r2.sig2) - - print (self.r1.fields) - print (self.r1.shape()) - print ("width", len(self.r1)) - m.d.comb += self.sig123.eq(cat(self.r1)) - - return m - - -def testbench(dut): - yield dut.r1.sig1.eq(5) - yield dut.r1.r2.sig2.eq(10) - yield dut.r1.r3.sig3.eq(1) - - sig1 = yield dut.r1.sig1 - assert sig1 == 5 - sig2 = yield dut.r1.r2.sig2 - assert sig2 == 10 - - yield - - sig123 = yield dut.sig123 - print ("sig123", hex(sig123)) - assert sig123 == 0x1000a0005 - - - -class RecordTest2(Elaboratable): - - def __init__(self): - self.r1 = RecordObject() - self.r1.sig1 = Signal(16) - self.r1.r2 = RecordObject() - self.r1.r2.sig2 = Signal(16) - self.r1.r3 = RecordObject() - self.r1.r3.sig3 = Signal(16) - self.sig123 = Signal(48) - - def elaborate(self, platform): - m = Module() - - m.d.comb += cat(self.r1).eq(self.sig123) - - return m - - -def testbench2(dut): - - sig123 = yield dut.sig123.eq(0x1000a0005) - - yield - - sig1 = yield dut.r1.sig1 - assert sig1 == 5 - sig2 = yield dut.r1.r2.sig2 - assert sig2 == 10 - sig3 = yield dut.r1.r3.sig3 - assert sig3 == 1 - - - -###################################################################### -# Unit Tests -###################################################################### - -if __name__ == '__main__': - print ("test 1") - dut = RecordTest() - run_simulation(dut, testbench(dut), vcd_name="test_record1.vcd") - vl = rtlil.convert(dut, ports=[dut.sig123, dut.r1.sig1, dut.r1.r2.sig2]) - with open("test_record1.il", "w") as f: - f.write(vl) - - print ("test 2") - dut = RecordTest2() - run_simulation(dut, testbench2(dut), vcd_name="test_record2.vcd") - vl = rtlil.convert(dut, ports=[dut.sig123, dut.r1.sig1, dut.r1.r2.sig2]) - with open("test_record2.il", "w") as f: - f.write(vl) - diff --git a/src/add/rstation_row.py b/src/add/rstation_row.py deleted file mode 100644 index aeb58732..00000000 --- a/src/add/rstation_row.py +++ /dev/null @@ -1,39 +0,0 @@ -from nmigen import Signal, Cat, Const, Mux, Module - -from nmigen.cli import main, verilog - -from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase -from fpbase import MultiShiftRMerge - -class ReservationStationRow: - - def __init__(self, width, id_wid): - """ Reservation Station row - - * width: bit-width of IEEE754. supported: 16, 32, 64 - * id_wid: an identifier to be passed through to the FunctionUnit - """ - self.width = width - - self.in_a = Signal(width) - self.in_b = Signal(width) - self.id_wid = id_wid - self.out_z = Signal(width) - - def elaborate(self, platform=None): - """ creates the HDL code-fragment for ReservationStationRow - """ - m = Module() - - return m - - -if __name__ == "__main__": - rs = ReservationStationRow(width=32, id_wid=Const(1,4)) - main(alu, ports=[rs.in_a, rs.in_b, rs.out_z] - - # works... but don't use, just do "python fname.py convert -t v" - #print (verilog.convert(alu, ports=[ - # ports=alu.in_a.ports() + \ - # alu.in_b.ports() + \ - # alu.out_z.ports()) diff --git a/src/add/singlepipe.py b/src/add/singlepipe.py deleted file mode 100644 index 68b62e43..00000000 --- a/src/add/singlepipe.py +++ /dev/null @@ -1,829 +0,0 @@ -""" Pipeline API. For multi-input and multi-output variants, see multipipe. - - Associated development bugs: - * http://bugs.libre-riscv.org/show_bug.cgi?id=64 - * http://bugs.libre-riscv.org/show_bug.cgi?id=57 - - Important: see Stage API (stageapi.py) in combination with below - - RecordBasedStage: - ---------------- - - A convenience class that takes an input shape, output shape, a - "processing" function and an optional "setup" function. Honestly - though, there's not much more effort to just... create a class - that returns a couple of Records (see ExampleAddRecordStage in - examples). - - PassThroughStage: - ---------------- - - A convenience class that takes a single function as a parameter, - that is chain-called to create the exact same input and output spec. - It has a process() function that simply returns its input. - - Instances of this class are completely redundant if handed to - StageChain, however when passed to UnbufferedPipeline they - can be used to introduce a single clock delay. - - ControlBase: - ----------- - - The base class for pipelines. Contains previous and next ready/valid/data. - Also has an extremely useful "connect" function that can be used to - connect a chain of pipelines and present the exact same prev/next - ready/valid/data API. - - Note: pipelines basically do not become pipelines as such until - handed to a derivative of ControlBase. ControlBase itself is *not* - strictly considered a pipeline class. Wishbone and AXI4 (master or - slave) could be derived from ControlBase, for example. - UnbufferedPipeline: - ------------------ - - A simple stalling clock-synchronised pipeline that has no buffering - (unlike BufferedHandshake). Data flows on *every* clock cycle when - the conditions are right (this is nominally when the input is valid - and the output is ready). - - A stall anywhere along the line will result in a stall back-propagating - down the entire chain. The BufferedHandshake by contrast will buffer - incoming data, allowing previous stages one clock cycle's grace before - also having to stall. - - An advantage of the UnbufferedPipeline over the Buffered one is - that the amount of logic needed (number of gates) is greatly - reduced (no second set of buffers basically) - - The disadvantage of the UnbufferedPipeline is that the valid/ready - logic, if chained together, is *combinatorial*, resulting in - progressively larger gate delay. - - PassThroughHandshake: - ------------------ - - A Control class that introduces a single clock delay, passing its - data through unaltered. Unlike RegisterPipeline (which relies - on UnbufferedPipeline and PassThroughStage) it handles ready/valid - itself. - - RegisterPipeline: - ---------------- - - A convenience class that, because UnbufferedPipeline introduces a single - clock delay, when its stage is a PassThroughStage, it results in a Pipeline - stage that, duh, delays its (unmodified) input by one clock cycle. - - BufferedHandshake: - ---------------- - - nmigen implementation of buffered pipeline stage, based on zipcpu: - https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html - - this module requires quite a bit of thought to understand how it works - (and why it is needed in the first place). reading the above is - *strongly* recommended. - - unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires - the STB / ACK signals to raise and lower (on separate clocks) before - data may proceeed (thus only allowing one piece of data to proceed - on *ALTERNATE* cycles), the signalling here is a true pipeline - where data will flow on *every* clock when the conditions are right. - - input acceptance conditions are when: - * incoming previous-stage strobe (p.valid_i) is HIGH - * outgoing previous-stage ready (p.ready_o) is LOW - - output transmission conditions are when: - * outgoing next-stage strobe (n.valid_o) is HIGH - * outgoing next-stage ready (n.ready_i) is LOW - - the tricky bit is when the input has valid data and the output is not - ready to accept it. if it wasn't for the clock synchronisation, it - would be possible to tell the input "hey don't send that data, we're - not ready". unfortunately, it's not possible to "change the past": - the previous stage *has no choice* but to pass on its data. - - therefore, the incoming data *must* be accepted - and stored: that - is the responsibility / contract that this stage *must* accept. - on the same clock, it's possible to tell the input that it must - not send any more data. this is the "stall" condition. - - we now effectively have *two* possible pieces of data to "choose" from: - the buffered data, and the incoming data. the decision as to which - to process and output is based on whether we are in "stall" or not. - i.e. when the next stage is no longer ready, the output comes from - the buffer if a stall had previously occurred, otherwise it comes - direct from processing the input. - - this allows us to respect a synchronous "travelling STB" with what - dan calls a "buffered handshake". - - it's quite a complex state machine! - - SimpleHandshake - --------------- - - Synchronised pipeline, Based on: - https://github.com/ZipCPU/dbgbus/blob/master/hexbus/rtl/hbdeword.v -""" - -from nmigen import Signal, Mux, Module, Elaboratable -from nmigen.cli import verilog, rtlil -from nmigen.hdl.rec import Record - -from queue import Queue -import inspect - -from iocontrol import (PrevControl, NextControl, Object, RecordObject) -from stageapi import (_spec, StageCls, Stage, StageChain, StageHelper) -import nmoperator - - -class RecordBasedStage(Stage): - """ convenience class which provides a Records-based layout. - honestly it's a lot easier just to create a direct Records-based - class (see ExampleAddRecordStage) - """ - def __init__(self, in_shape, out_shape, processfn, setupfn=None): - self.in_shape = in_shape - self.out_shape = out_shape - self.__process = processfn - self.__setup = setupfn - def ispec(self): return Record(self.in_shape) - def ospec(self): return Record(self.out_shape) - def process(seif, i): return self.__process(i) - def setup(seif, m, i): return self.__setup(m, i) - - -class PassThroughStage(StageCls): - """ a pass-through stage with its input data spec identical to its output, - and "passes through" its data from input to output (does nothing). - - use this basically to explicitly make any data spec Stage-compliant. - (many APIs would potentially use a static "wrap" method in e.g. - StageCls to achieve a similar effect) - """ - def __init__(self, iospecfn): self.iospecfn = iospecfn - def ispec(self): return self.iospecfn() - def ospec(self): return self.iospecfn() - - -class ControlBase(StageHelper, Elaboratable): - """ Common functions for Pipeline API. Note: a "pipeline stage" only - exists (conceptually) when a ControlBase derivative is handed - a Stage (combinatorial block) - - NOTE: ControlBase derives from StageHelper, making it accidentally - compliant with the Stage API. Using those functions directly - *BYPASSES* a ControlBase instance ready/valid signalling, which - clearly should not be done without a really, really good reason. - """ - def __init__(self, stage=None, in_multi=None, stage_ctl=False): - """ Base class containing ready/valid/data to previous and next stages - - * p: contains ready/valid to the previous stage - * n: contains ready/valid to the next stage - - Except when calling Controlbase.connect(), user must also: - * add data_i member to PrevControl (p) and - * add data_o member to NextControl (n) - Calling ControlBase._new_data is a good way to do that. - """ - StageHelper.__init__(self, stage) - - # set up input and output IO ACK (prev/next ready/valid) - self.p = PrevControl(in_multi, stage_ctl) - self.n = NextControl(stage_ctl) - - # set up the input and output data - if stage is not None: - self._new_data("data") - - def _new_data(self, name): - """ allocates new data_i and data_o - """ - self.p.data_i, self.n.data_o = self.new_specs(name) - - @property - def data_r(self): - return self.process(self.p.data_i) - - def connect_to_next(self, nxt): - """ helper function to connect to the next stage data/valid/ready. - """ - return self.n.connect_to_next(nxt.p) - - def _connect_in(self, prev): - """ internal helper function to connect stage to an input source. - do not use to connect stage-to-stage! - """ - return self.p._connect_in(prev.p) - - def _connect_out(self, nxt): - """ internal helper function to connect stage to an output source. - do not use to connect stage-to-stage! - """ - return self.n._connect_out(nxt.n) - - def connect(self, pipechain): - """ connects a chain (list) of Pipeline instances together and - links them to this ControlBase instance: - - in <----> self <---> out - | ^ - v | - [pipe1, pipe2, pipe3, pipe4] - | ^ | ^ | ^ - v | v | v | - out---in out--in out---in - - Also takes care of allocating data_i/data_o, by looking up - the data spec for each end of the pipechain. i.e It is NOT - necessary to allocate self.p.data_i or self.n.data_o manually: - this is handled AUTOMATICALLY, here. - - Basically this function is the direct equivalent of StageChain, - except that unlike StageChain, the Pipeline logic is followed. - - Just as StageChain presents an object that conforms to the - Stage API from a list of objects that also conform to the - Stage API, an object that calls this Pipeline connect function - has the exact same pipeline API as the list of pipline objects - it is called with. - - Thus it becomes possible to build up larger chains recursively. - More complex chains (multi-input, multi-output) will have to be - done manually. - - Argument: - - * :pipechain: - a sequence of ControlBase-derived classes - (must be one or more in length) - - Returns: - - * a list of eq assignments that will need to be added in - an elaborate() to m.d.comb - """ - assert len(pipechain) > 0, "pipechain must be non-zero length" - assert self.stage is None, "do not use connect with a stage" - eqs = [] # collated list of assignment statements - - # connect inter-chain - for i in range(len(pipechain)-1): - pipe1 = pipechain[i] # earlier - pipe2 = pipechain[i+1] # later (by 1) - eqs += pipe1.connect_to_next(pipe2) # earlier n to later p - - # connect front and back of chain to ourselves - front = pipechain[0] # first in chain - end = pipechain[-1] # last in chain - self.set_specs(front, end) # sets up ispec/ospec functions - self._new_data("chain") # NOTE: REPLACES existing data - eqs += front._connect_in(self) # front p to our p - eqs += end._connect_out(self) # end n to our n - - return eqs - - def set_input(self, i): - """ helper function to set the input data (used in unit tests) - """ - return nmoperator.eq(self.p.data_i, i) - - def __iter__(self): - yield from self.p # yields ready/valid/data (data also gets yielded) - yield from self.n # ditto - - def ports(self): - return list(self) - - def elaborate(self, platform): - """ handles case where stage has dynamic ready/valid functions - """ - m = Module() - m.submodules.p = self.p - m.submodules.n = self.n - - self.setup(m, self.p.data_i) - - if not self.p.stage_ctl: - return m - - # intercept the previous (outgoing) "ready", combine with stage ready - m.d.comb += self.p.s_ready_o.eq(self.p._ready_o & self.stage.d_ready) - - # intercept the next (incoming) "ready" and combine it with data valid - sdv = self.stage.d_valid(self.n.ready_i) - m.d.comb += self.n.d_valid.eq(self.n.ready_i & sdv) - - return m - - -class BufferedHandshake(ControlBase): - """ buffered pipeline stage. data and strobe signals travel in sync. - if ever the input is ready and the output is not, processed data - is shunted in a temporary register. - - Argument: stage. see Stage API above - - stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 - stage-1 p.ready_o <>in stage n.data_o out>> stage+1 - | | - process --->----^ - | | - +-- r_data ->-+ - - input data p.data_i is read (only), is processed and goes into an - intermediate result store [process()]. this is updated combinatorially. - - in a non-stall condition, the intermediate result will go into the - output (update_output). however if ever there is a stall, it goes - into r_data instead [update_buffer()]. - - when the non-stall condition is released, r_data is the first - to be transferred to the output [flush_buffer()], and the stall - condition cleared. - - on the next cycle (as long as stall is not raised again) the - input may begin to be processed and transferred directly to output. - """ - - def elaborate(self, platform): - self.m = ControlBase.elaborate(self, platform) - - result = _spec(self.stage.ospec, "r_tmp") - r_data = _spec(self.stage.ospec, "r_data") - - # establish some combinatorial temporaries - o_n_validn = Signal(reset_less=True) - n_ready_i = Signal(reset_less=True, name="n_i_rdy_data") - nir_por = Signal(reset_less=True) - nir_por_n = Signal(reset_less=True) - p_valid_i = Signal(reset_less=True) - nir_novn = Signal(reset_less=True) - nirn_novn = Signal(reset_less=True) - por_pivn = Signal(reset_less=True) - npnn = Signal(reset_less=True) - self.m.d.comb += [p_valid_i.eq(self.p.valid_i_test), - o_n_validn.eq(~self.n.valid_o), - n_ready_i.eq(self.n.ready_i_test), - nir_por.eq(n_ready_i & self.p._ready_o), - nir_por_n.eq(n_ready_i & ~self.p._ready_o), - nir_novn.eq(n_ready_i | o_n_validn), - nirn_novn.eq(~n_ready_i & o_n_validn), - npnn.eq(nir_por | nirn_novn), - por_pivn.eq(self.p._ready_o & ~p_valid_i) - ] - - # store result of processing in combinatorial temporary - self.m.d.comb += nmoperator.eq(result, self.data_r) - - # if not in stall condition, update the temporary register - with self.m.If(self.p.ready_o): # not stalled - self.m.d.sync += nmoperator.eq(r_data, result) # update buffer - - # data pass-through conditions - with self.m.If(npnn): - data_o = self._postprocess(result) # XXX TBD, does nothing right now - self.m.d.sync += [self.n.valid_o.eq(p_valid_i), # valid if p_valid - nmoperator.eq(self.n.data_o, data_o), # update out - ] - # buffer flush conditions (NOTE: can override data passthru conditions) - with self.m.If(nir_por_n): # not stalled - # Flush the [already processed] buffer to the output port. - data_o = self._postprocess(r_data) # XXX TBD, does nothing right now - self.m.d.sync += [self.n.valid_o.eq(1), # reg empty - nmoperator.eq(self.n.data_o, data_o), # flush - ] - # output ready conditions - self.m.d.sync += self.p._ready_o.eq(nir_novn | por_pivn) - - return self.m - - -class SimpleHandshake(ControlBase): - """ simple handshake control. data and strobe signals travel in sync. - implements the protocol used by Wishbone and AXI4. - - Argument: stage. see Stage API above - - stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 - stage-1 p.ready_o <>in stage n.data_o out>> stage+1 - | | - +--process->--^ - Truth Table - - Inputs Temporary Output Data - ------- ---------- ----- ---- - P P N N PiV& ~NiR& N P - i o i o PoR NoV o o - V R R V V R - - ------- - - - - - 0 0 0 0 0 0 >0 0 reg - 0 0 0 1 0 1 >1 0 reg - 0 0 1 0 0 0 0 1 process(data_i) - 0 0 1 1 0 0 0 1 process(data_i) - ------- - - - - - 0 1 0 0 0 0 >0 0 reg - 0 1 0 1 0 1 >1 0 reg - 0 1 1 0 0 0 0 1 process(data_i) - 0 1 1 1 0 0 0 1 process(data_i) - ------- - - - - - 1 0 0 0 0 0 >0 0 reg - 1 0 0 1 0 1 >1 0 reg - 1 0 1 0 0 0 0 1 process(data_i) - 1 0 1 1 0 0 0 1 process(data_i) - ------- - - - - - 1 1 0 0 1 0 1 0 process(data_i) - 1 1 0 1 1 1 1 0 process(data_i) - 1 1 1 0 1 0 1 1 process(data_i) - 1 1 1 1 1 0 1 1 process(data_i) - ------- - - - - - """ - - def elaborate(self, platform): - self.m = m = ControlBase.elaborate(self, platform) - - r_busy = Signal() - result = _spec(self.stage.ospec, "r_tmp") - - # establish some combinatorial temporaries - n_ready_i = Signal(reset_less=True, name="n_i_rdy_data") - p_valid_i_p_ready_o = Signal(reset_less=True) - p_valid_i = Signal(reset_less=True) - m.d.comb += [p_valid_i.eq(self.p.valid_i_test), - n_ready_i.eq(self.n.ready_i_test), - p_valid_i_p_ready_o.eq(p_valid_i & self.p.ready_o), - ] - - # store result of processing in combinatorial temporary - m.d.comb += nmoperator.eq(result, self.data_r) - - # previous valid and ready - with m.If(p_valid_i_p_ready_o): - data_o = self._postprocess(result) # XXX TBD, does nothing right now - m.d.sync += [r_busy.eq(1), # output valid - nmoperator.eq(self.n.data_o, data_o), # update output - ] - # previous invalid or not ready, however next is accepting - with m.Elif(n_ready_i): - data_o = self._postprocess(result) # XXX TBD, does nothing right now - m.d.sync += [nmoperator.eq(self.n.data_o, data_o)] - # TODO: could still send data here (if there was any) - #m.d.sync += self.n.valid_o.eq(0) # ...so set output invalid - m.d.sync += r_busy.eq(0) # ...so set output invalid - - m.d.comb += self.n.valid_o.eq(r_busy) - # if next is ready, so is previous - m.d.comb += self.p._ready_o.eq(n_ready_i) - - return self.m - - -class UnbufferedPipeline(ControlBase): - """ A simple pipeline stage with single-clock synchronisation - and two-way valid/ready synchronised signalling. - - Note that a stall in one stage will result in the entire pipeline - chain stalling. - - Also that unlike BufferedHandshake, the valid/ready signalling does NOT - travel synchronously with the data: the valid/ready signalling - combines in a *combinatorial* fashion. Therefore, a long pipeline - chain will lengthen propagation delays. - - Argument: stage. see Stage API, above - - stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 - stage-1 p.ready_o <>in stage n.data_o out>> stage+1 - | | - r_data result - | | - +--process ->-+ - - Attributes: - ----------- - p.data_i : StageInput, shaped according to ispec - The pipeline input - p.data_o : StageOutput, shaped according to ospec - The pipeline output - r_data : input_shape according to ispec - A temporary (buffered) copy of a prior (valid) input. - This is HELD if the output is not ready. It is updated - SYNCHRONOUSLY. - result: output_shape according to ospec - The output of the combinatorial logic. it is updated - COMBINATORIALLY (no clock dependence). - - Truth Table - - Inputs Temp Output Data - ------- - ----- ---- - P P N N ~NiR& N P - i o i o NoV o o - V R R V V R - - ------- - - - - 0 0 0 0 0 0 1 reg - 0 0 0 1 1 1 0 reg - 0 0 1 0 0 0 1 reg - 0 0 1 1 0 0 1 reg - ------- - - - - 0 1 0 0 0 0 1 reg - 0 1 0 1 1 1 0 reg - 0 1 1 0 0 0 1 reg - 0 1 1 1 0 0 1 reg - ------- - - - - 1 0 0 0 0 1 1 reg - 1 0 0 1 1 1 0 reg - 1 0 1 0 0 1 1 reg - 1 0 1 1 0 1 1 reg - ------- - - - - 1 1 0 0 0 1 1 process(data_i) - 1 1 0 1 1 1 0 process(data_i) - 1 1 1 0 0 1 1 process(data_i) - 1 1 1 1 0 1 1 process(data_i) - ------- - - - - - Note: PoR is *NOT* involved in the above decision-making. - """ - - def elaborate(self, platform): - self.m = m = ControlBase.elaborate(self, platform) - - data_valid = Signal() # is data valid or not - r_data = _spec(self.stage.ospec, "r_tmp") # output type - - # some temporaries - p_valid_i = Signal(reset_less=True) - pv = Signal(reset_less=True) - buf_full = Signal(reset_less=True) - m.d.comb += p_valid_i.eq(self.p.valid_i_test) - m.d.comb += pv.eq(self.p.valid_i & self.p.ready_o) - m.d.comb += buf_full.eq(~self.n.ready_i_test & data_valid) - - m.d.comb += self.n.valid_o.eq(data_valid) - m.d.comb += self.p._ready_o.eq(~data_valid | self.n.ready_i_test) - m.d.sync += data_valid.eq(p_valid_i | buf_full) - - with m.If(pv): - m.d.sync += nmoperator.eq(r_data, self.data_r) - data_o = self._postprocess(r_data) # XXX TBD, does nothing right now - m.d.comb += nmoperator.eq(self.n.data_o, data_o) - - return self.m - -class UnbufferedPipeline2(ControlBase): - """ A simple pipeline stage with single-clock synchronisation - and two-way valid/ready synchronised signalling. - - Note that a stall in one stage will result in the entire pipeline - chain stalling. - - Also that unlike BufferedHandshake, the valid/ready signalling does NOT - travel synchronously with the data: the valid/ready signalling - combines in a *combinatorial* fashion. Therefore, a long pipeline - chain will lengthen propagation delays. - - Argument: stage. see Stage API, above - - stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 - stage-1 p.ready_o <>in stage n.data_o out>> stage+1 - | | | - +- process-> buf <-+ - Attributes: - ----------- - p.data_i : StageInput, shaped according to ispec - The pipeline input - p.data_o : StageOutput, shaped according to ospec - The pipeline output - buf : output_shape according to ospec - A temporary (buffered) copy of a valid output - This is HELD if the output is not ready. It is updated - SYNCHRONOUSLY. - - Inputs Temp Output Data - ------- - ----- - P P N N ~NiR& N P (buf_full) - i o i o NoV o o - V R R V V R - - ------- - - - - 0 0 0 0 0 0 1 process(data_i) - 0 0 0 1 1 1 0 reg (odata, unchanged) - 0 0 1 0 0 0 1 process(data_i) - 0 0 1 1 0 0 1 process(data_i) - ------- - - - - 0 1 0 0 0 0 1 process(data_i) - 0 1 0 1 1 1 0 reg (odata, unchanged) - 0 1 1 0 0 0 1 process(data_i) - 0 1 1 1 0 0 1 process(data_i) - ------- - - - - 1 0 0 0 0 1 1 process(data_i) - 1 0 0 1 1 1 0 reg (odata, unchanged) - 1 0 1 0 0 1 1 process(data_i) - 1 0 1 1 0 1 1 process(data_i) - ------- - - - - 1 1 0 0 0 1 1 process(data_i) - 1 1 0 1 1 1 0 reg (odata, unchanged) - 1 1 1 0 0 1 1 process(data_i) - 1 1 1 1 0 1 1 process(data_i) - ------- - - - - - Note: PoR is *NOT* involved in the above decision-making. - """ - - def elaborate(self, platform): - self.m = m = ControlBase.elaborate(self, platform) - - buf_full = Signal() # is data valid or not - buf = _spec(self.stage.ospec, "r_tmp") # output type - - # some temporaries - p_valid_i = Signal(reset_less=True) - m.d.comb += p_valid_i.eq(self.p.valid_i_test) - - m.d.comb += self.n.valid_o.eq(buf_full | p_valid_i) - m.d.comb += self.p._ready_o.eq(~buf_full) - m.d.sync += buf_full.eq(~self.n.ready_i_test & self.n.valid_o) - - data_o = Mux(buf_full, buf, self.data_r) - data_o = self._postprocess(data_o) # XXX TBD, does nothing right now - m.d.comb += nmoperator.eq(self.n.data_o, data_o) - m.d.sync += nmoperator.eq(buf, self.n.data_o) - - return self.m - - -class PassThroughHandshake(ControlBase): - """ A control block that delays by one clock cycle. - - Inputs Temporary Output Data - ------- ------------------ ----- ---- - P P N N PiV& PiV| NiR| pvr N P (pvr) - i o i o PoR ~PoR ~NoV o o - V R R V V R - - ------- - - - - - - - 0 0 0 0 0 1 1 0 1 1 odata (unchanged) - 0 0 0 1 0 1 0 0 1 0 odata (unchanged) - 0 0 1 0 0 1 1 0 1 1 odata (unchanged) - 0 0 1 1 0 1 1 0 1 1 odata (unchanged) - ------- - - - - - - - 0 1 0 0 0 0 1 0 0 1 odata (unchanged) - 0 1 0 1 0 0 0 0 0 0 odata (unchanged) - 0 1 1 0 0 0 1 0 0 1 odata (unchanged) - 0 1 1 1 0 0 1 0 0 1 odata (unchanged) - ------- - - - - - - - 1 0 0 0 0 1 1 1 1 1 process(in) - 1 0 0 1 0 1 0 0 1 0 odata (unchanged) - 1 0 1 0 0 1 1 1 1 1 process(in) - 1 0 1 1 0 1 1 1 1 1 process(in) - ------- - - - - - - - 1 1 0 0 1 1 1 1 1 1 process(in) - 1 1 0 1 1 1 0 0 1 0 odata (unchanged) - 1 1 1 0 1 1 1 1 1 1 process(in) - 1 1 1 1 1 1 1 1 1 1 process(in) - ------- - - - - - - - - """ - - def elaborate(self, platform): - self.m = m = ControlBase.elaborate(self, platform) - - r_data = _spec(self.stage.ospec, "r_tmp") # output type - - # temporaries - p_valid_i = Signal(reset_less=True) - pvr = Signal(reset_less=True) - m.d.comb += p_valid_i.eq(self.p.valid_i_test) - m.d.comb += pvr.eq(p_valid_i & self.p.ready_o) - - m.d.comb += self.p.ready_o.eq(~self.n.valid_o | self.n.ready_i_test) - m.d.sync += self.n.valid_o.eq(p_valid_i | ~self.p.ready_o) - - odata = Mux(pvr, self.data_r, r_data) - m.d.sync += nmoperator.eq(r_data, odata) - r_data = self._postprocess(r_data) # XXX TBD, does nothing right now - m.d.comb += nmoperator.eq(self.n.data_o, r_data) - - return m - - -class RegisterPipeline(UnbufferedPipeline): - """ A pipeline stage that delays by one clock cycle, creating a - sync'd latch out of data_o and valid_o as an indirect byproduct - of using PassThroughStage - """ - def __init__(self, iospecfn): - UnbufferedPipeline.__init__(self, PassThroughStage(iospecfn)) - - -class FIFOControl(ControlBase): - """ FIFO Control. Uses Queue to store data, coincidentally - happens to have same valid/ready signalling as Stage API. - - data_i -> fifo.din -> FIFO -> fifo.dout -> data_o - """ - def __init__(self, depth, stage, in_multi=None, stage_ctl=False, - fwft=True, pipe=False): - """ FIFO Control - - * :depth: number of entries in the FIFO - * :stage: data processing block - * :fwft: first word fall-thru mode (non-fwft introduces delay) - * :pipe: specifies pipe mode. - - when fwft = True it indicates that transfers may occur - combinatorially through stage processing in the same clock cycle. - This requires that the Stage be a Moore FSM: - https://en.wikipedia.org/wiki/Moore_machine - - when fwft = False it indicates that all output signals are - produced only from internal registers or memory, i.e. that the - Stage is a Mealy FSM: - https://en.wikipedia.org/wiki/Mealy_machine - - data is processed (and located) as follows: - - self.p self.stage temp fn temp fn temp fp self.n - data_i->process()->result->cat->din.FIFO.dout->cat(data_o) - - yes, really: cat produces a Cat() which can be assigned to. - this is how the FIFO gets de-catted without needing a de-cat - function - """ - self.fwft = fwft - self.pipe = pipe - self.fdepth = depth - ControlBase.__init__(self, stage, in_multi, stage_ctl) - - def elaborate(self, platform): - self.m = m = ControlBase.elaborate(self, platform) - - # make a FIFO with a signal of equal width to the data_o. - (fwidth, _) = nmoperator.shape(self.n.data_o) - fifo = Queue(fwidth, self.fdepth, fwft=self.fwft, pipe=self.pipe) - m.submodules.fifo = fifo - - def processfn(data_i): - # store result of processing in combinatorial temporary - result = _spec(self.stage.ospec, "r_temp") - m.d.comb += nmoperator.eq(result, self.process(data_i)) - return nmoperator.cat(result) - - ## prev: make the FIFO (Queue object) "look" like a PrevControl... - m.submodules.fp = fp = PrevControl() - fp.valid_i, fp._ready_o, fp.data_i = fifo.we, fifo.writable, fifo.din - m.d.comb += fp._connect_in(self.p, fn=processfn) - - # next: make the FIFO (Queue object) "look" like a NextControl... - m.submodules.fn = fn = NextControl() - fn.valid_o, fn.ready_i, fn.data_o = fifo.readable, fifo.re, fifo.dout - connections = fn._connect_out(self.n, fn=nmoperator.cat) - - # ok ok so we can't just do the ready/valid eqs straight: - # first 2 from connections are the ready/valid, 3rd is data. - if self.fwft: - m.d.comb += connections[:2] # combinatorial on next ready/valid - else: - m.d.sync += connections[:2] # non-fwft mode needs sync - data_o = connections[2] # get the data - data_o = self._postprocess(data_o) # XXX TBD, does nothing right now - m.d.comb += data_o - - return m - - -# aka "RegStage". -class UnbufferedPipeline(FIFOControl): - def __init__(self, stage, in_multi=None, stage_ctl=False): - FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl, - fwft=True, pipe=False) - -# aka "BreakReadyStage" XXX had to set fwft=True to get it to work -class PassThroughHandshake(FIFOControl): - def __init__(self, stage, in_multi=None, stage_ctl=False): - FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl, - fwft=True, pipe=True) - -# this is *probably* BufferedHandshake, although test #997 now succeeds. -class BufferedHandshake(FIFOControl): - def __init__(self, stage, in_multi=None, stage_ctl=False): - FIFOControl.__init__(self, 2, stage, in_multi, stage_ctl, - fwft=True, pipe=False) - - -""" -# this is *probably* SimpleHandshake (note: memory cell size=0) -class SimpleHandshake(FIFOControl): - def __init__(self, stage, in_multi=None, stage_ctl=False): - FIFOControl.__init__(self, 0, stage, in_multi, stage_ctl, - fwft=True, pipe=False) -""" diff --git a/src/add/stageapi.py b/src/add/stageapi.py deleted file mode 100644 index 9651bf79..00000000 --- a/src/add/stageapi.py +++ /dev/null @@ -1,271 +0,0 @@ -""" Stage API - - Associated development bugs: - * http://bugs.libre-riscv.org/show_bug.cgi?id=64 - * http://bugs.libre-riscv.org/show_bug.cgi?id=57 - - Stage API: - --------- - - stage requires compliance with a strict API that may be - implemented in several means, including as a static class. - - Stages do not HOLD data, and they definitely do not contain - signalling (ready/valid). They do however specify the FORMAT - of the incoming and outgoing data, and they provide a means to - PROCESS that data (from incoming format to outgoing format). - - Stage Blocks really should be combinatorial blocks (Moore FSMs). - It would be ok to have input come in from sync'd sources - (clock-driven, Mealy FSMs) however by doing so they would no longer - be deterministic, and chaining such blocks with such side-effects - together could result in unexpected, unpredictable, unreproduceable - behaviour. - - So generally to be avoided, then unless you know what you are doing. - https://en.wikipedia.org/wiki/Moore_machine - https://en.wikipedia.org/wiki/Mealy_machine - - the methods of a stage instance must be as follows: - - * ispec() - Input data format specification. Takes a bit of explaining. - The requirements are: something that eventually derives from - nmigen Value must be returned *OR* an iterator or iterable - or sequence (list, tuple etc.) or generator must *yield* - thing(s) that (eventually) derive from the nmigen Value class. - - Complex to state, very simple in practice: - see test_buf_pipe.py for over 25 worked examples. - - * ospec() - Output data format specification. - format requirements identical to ispec. - - * process(m, i) - Optional function for processing ispec-formatted data. - returns a combinatorial block of a result that - may be assigned to the output, by way of the "nmoperator.eq" - function. Note that what is returned here can be - extremely flexible. Even a dictionary can be returned - as long as it has fields that match precisely with the - Record into which its values is intended to be assigned. - Again: see example unit tests for details. - - * setup(m, i) - Optional function for setting up submodules. - may be used for more complex stages, to link - the input (i) to submodules. must take responsibility - for adding those submodules to the module (m). - the submodules must be combinatorial blocks and - must have their inputs and output linked combinatorially. - - Both StageCls (for use with non-static classes) and Stage (for use - by static classes) are abstract classes from which, for convenience - and as a courtesy to other developers, anything conforming to the - Stage API may *choose* to derive. See Liskov Substitution Principle: - https://en.wikipedia.org/wiki/Liskov_substitution_principle - - StageChain: - ---------- - - A useful combinatorial wrapper around stages that chains them together - and then presents a Stage-API-conformant interface. By presenting - the same API as the stages it wraps, it can clearly be used recursively. - - StageHelper: - ---------- - - A convenience wrapper around a Stage-API-compliant "thing" which - complies with the Stage API and provides mandatory versions of - all the optional bits. -""" - -from abc import ABCMeta, abstractmethod -import inspect - -import nmoperator - - -def _spec(fn, name=None): - """ useful function that determines if "fn" has an argument "name". - if so, fn(name) is called otherwise fn() is called. - - means that ispec and ospec can be declared with *or without* - a name argument. normally it would be necessary to have - "ispec(name=None)" to achieve the same effect. - """ - if name is None: - return fn() - varnames = dict(inspect.getmembers(fn.__code__))['co_varnames'] - if 'name' in varnames: - return fn(name=name) - return fn() - - -class StageCls(metaclass=ABCMeta): - """ Class-based "Stage" API. requires instantiation (after derivation) - - see "Stage API" above.. Note: python does *not* require derivation - from this class. All that is required is that the pipelines *have* - the functions listed in this class. Derivation from this class - is therefore merely a "courtesy" to maintainers. - """ - @abstractmethod - def ispec(self): pass # REQUIRED - @abstractmethod - def ospec(self): pass # REQUIRED - #@abstractmethod - #def setup(self, m, i): pass # OPTIONAL - #@abstractmethod - #def process(self, i): pass # OPTIONAL - - -class Stage(metaclass=ABCMeta): - """ Static "Stage" API. does not require instantiation (after derivation) - - see "Stage API" above. Note: python does *not* require derivation - from this class. All that is required is that the pipelines *have* - the functions listed in this class. Derivation from this class - is therefore merely a "courtesy" to maintainers. - """ - @staticmethod - @abstractmethod - def ispec(): pass - - @staticmethod - @abstractmethod - def ospec(): pass - - #@staticmethod - #@abstractmethod - #def setup(m, i): pass - - #@staticmethod - #@abstractmethod - #def process(i): pass - - -class StageHelper(Stage): - """ a convenience wrapper around something that is Stage-API-compliant. - (that "something" may be a static class, for example). - - StageHelper happens to also be compliant with the Stage API, - it differs from the stage that it wraps in that all the "optional" - functions are provided (hence the designation "convenience wrapper") - """ - def __init__(self, stage): - self.stage = stage - self._ispecfn = None - self._ospecfn = None - if stage is not None: - self.set_specs(self, self) - - def ospec(self, name): - assert self._ospecfn is not None - return _spec(self._ospecfn, name) - - def ispec(self, name): - assert self._ispecfn is not None - return _spec(self._ispecfn, name) - - def set_specs(self, p, n): - """ sets up the ispecfn and ospecfn for getting input and output data - """ - if hasattr(p, "stage"): - p = p.stage - if hasattr(n, "stage"): - n = n.stage - self._ispecfn = p.ispec - self._ospecfn = n.ospec - - def new_specs(self, name): - """ allocates new ispec and ospec pair - """ - return (_spec(self.ispec, "%s_i" % name), - _spec(self.ospec, "%s_o" % name)) - - def process(self, i): - if self.stage and hasattr(self.stage, "process"): - return self.stage.process(i) - return i - - def setup(self, m, i): - if self.stage is not None and hasattr(self.stage, "setup"): - self.stage.setup(m, i) - - def _postprocess(self, i): # XXX DISABLED - return i # RETURNS INPUT - if hasattr(self.stage, "postprocess"): - return self.stage.postprocess(i) - return i - - -class StageChain(StageHelper): - """ pass in a list of stages, and they will automatically be - chained together via their input and output specs into a - combinatorial chain, to create one giant combinatorial block. - - the end result basically conforms to the exact same Stage API. - - * input to this class will be the input of the first stage - * output of first stage goes into input of second - * output of second goes into input into third - * ... (etc. etc.) - * the output of this class will be the output of the last stage - - NOTE: whilst this is very similar to ControlBase.connect(), it is - *really* important to appreciate that StageChain is pure - combinatorial and bypasses (does not involve, at all, ready/valid - signalling of any kind). - - ControlBase.connect on the other hand respects, connects, and uses - ready/valid signalling. - - Arguments: - - * :chain: a chain of combinatorial blocks conforming to the Stage API - NOTE: StageChain.ispec and ospect have to have something - to return (beginning and end specs of the chain), - therefore the chain argument must be non-zero length - - * :specallocate: if set, new input and output data will be allocated - and connected (eq'd) to each chained Stage. - in some cases if this is not done, the nmigen warning - "driving from two sources, module is being flattened" - will be issued. - - NOTE: do NOT use StageChain with combinatorial blocks that have - side-effects (state-based / clock-based input) or conditional - (inter-chain) dependencies, unless you really know what you are doing. - """ - def __init__(self, chain, specallocate=False): - assert len(chain) > 0, "stage chain must be non-zero length" - self.chain = chain - StageHelper.__init__(self, None) - self.setup = self._sa_setup if specallocate else self._na_setup - self.set_specs(self.chain[0], self.chain[-1]) - - def _sa_setup(self, m, i): - for (idx, c) in enumerate(self.chain): - if hasattr(c, "setup"): - c.setup(m, i) # stage may have some module stuff - ofn = self.chain[idx].ospec # last assignment survives - o = _spec(ofn, 'chainin%d' % idx) - m.d.comb += nmoperator.eq(o, c.process(i)) # process input into "o" - if idx == len(self.chain)-1: - break - ifn = self.chain[idx+1].ispec # new input on next loop - i = _spec(ifn, 'chainin%d' % (idx+1)) - m.d.comb += nmoperator.eq(i, o) # assign to next input - self.o = o - return self.o # last loop is the output - - def _na_setup(self, m, i): - for (idx, c) in enumerate(self.chain): - if hasattr(c, "setup"): - c.setup(m, i) # stage may have some module stuff - i = o = c.process(i) # store input into "o" - self.o = o - return self.o # last loop is the output - - def process(self, i): - return self.o # conform to Stage API: return last-loop output - - diff --git a/src/add/test_add.py b/src/add/test_add.py deleted file mode 100644 index 989cf482..00000000 --- a/src/add/test_add.py +++ /dev/null @@ -1,78 +0,0 @@ -from operator import add - -from nmigen import Module, Signal -from nmigen.compat.sim import run_simulation - -from nmigen_add_experiment import FPADD - -from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan, - is_inf, is_pos_inf, is_neg_inf, - match, get_rs_case, check_rs_case, run_test, - run_edge_cases, run_corner_cases) - -def testbench(dut): - yield from check_rs_case(dut, 0x36093399, 0x7f6a12f1, 0x7f6a12f1) - yield from check_rs_case(dut, 0x006CE3EE, 0x806CE3EC, 0x00000002) - yield from check_rs_case(dut, 0x00000047, 0x80000048, 0x80000001) - yield from check_rs_case(dut, 0x000116C2, 0x8001170A, 0x80000048) - yield from check_rs_case(dut, 0x7ed01f25, 0xff559e2c, 0xfedb1d33) - yield from check_rs_case(dut, 0, 0, 0) - yield from check_rs_case(dut, 0xFFFFFFFF, 0xC63B800A, 0x7FC00000) - yield from check_rs_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000) - #yield from check_rs_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000) - yield from check_rs_case(dut, 0x7F800000, 0xFF800000, 0x7FC00000) - yield from check_rs_case(dut, 0x42540000, 0xC2540000, 0x00000000) - yield from check_rs_case(dut, 0xC2540000, 0x42540000, 0x00000000) - yield from check_rs_case(dut, 0xfe34f995, 0xff5d59ad, 0xff800000) - yield from check_rs_case(dut, 0x82471f51, 0x243985f, 0x801c3790) - yield from check_rs_case(dut, 0x40000000, 0xc0000000, 0x00000000) - yield from check_rs_case(dut, 0x3F800000, 0x40000000, 0x40400000) - yield from check_rs_case(dut, 0x40000000, 0x3F800000, 0x40400000) - yield from check_rs_case(dut, 0x447A0000, 0x4488B000, 0x4502D800) - yield from check_rs_case(dut, 0x463B800A, 0x42BA8A3D, 0x463CF51E) - yield from check_rs_case(dut, 0x42BA8A3D, 0x463B800A, 0x463CF51E) - yield from check_rs_case(dut, 0x463B800A, 0xC2BA8A3D, 0x463A0AF6) - yield from check_rs_case(dut, 0xC2BA8A3D, 0x463B800A, 0x463A0AF6) - yield from check_rs_case(dut, 0xC63B800A, 0x42BA8A3D, 0xC63A0AF6) - yield from check_rs_case(dut, 0x42BA8A3D, 0xC63B800A, 0xC63A0AF6) - yield from check_rs_case(dut, 0x7F800000, 0x00000000, 0x7F800000) - yield from check_rs_case(dut, 0x00000000, 0x7F800000, 0x7F800000) - yield from check_rs_case(dut, 0xFF800000, 0x00000000, 0xFF800000) - yield from check_rs_case(dut, 0x00000000, 0xFF800000, 0xFF800000) - yield from check_rs_case(dut, 0x7F800000, 0x7F800000, 0x7F800000) - yield from check_rs_case(dut, 0xFF800000, 0xFF800000, 0xFF800000) - yield from check_rs_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000) - yield from check_rs_case(dut, 0x00018643, 0x00FA72A4, 0x00FBF8E7) - yield from check_rs_case(dut, 0x001A2239, 0x00FA72A4, 0x010A4A6E) - yield from check_rs_case(dut, 0x3F7FFFFE, 0x3F7FFFFE, 0x3FFFFFFE) - yield from check_rs_case(dut, 0x7EFFFFEE, 0x7EFFFFEE, 0x7F7FFFEE) - yield from check_rs_case(dut, 0x7F7FFFEE, 0xFEFFFFEE, 0x7EFFFFEE) - yield from check_rs_case(dut, 0x7F7FFFEE, 0x756CA884, 0x7F7FFFFD) - yield from check_rs_case(dut, 0x7F7FFFEE, 0x758A0CF8, 0x7F7FFFFF) - yield from check_rs_case(dut, 0x42500000, 0x51A7A358, 0x51A7A358) - yield from check_rs_case(dut, 0x51A7A358, 0x42500000, 0x51A7A358) - yield from check_rs_case(dut, 0x4E5693A4, 0x42500000, 0x4E5693A5) - yield from check_rs_case(dut, 0x42500000, 0x4E5693A4, 0x4E5693A5) - #yield from check_rs_case(dut, 1, 0, 1) - #yield from check_rs_case(dut, 1, 1, 1) - - count = 0 - - #regression tests - stimulus_a = [0x80000000, 0x22cb525a, 0x40000000, 0x83e73d5c, - 0xbf9b1e94, 0x34082401, - 0x5e8ef81, 0x5c75da81, 0x2b017] - stimulus_b = [0xff800001, 0xadd79efa, 0xC0000000, 0x1c800000, - 0xc038ed3a, 0xb328cd45, - 0x114f3db, 0x2f642a39, 0xff3807ab] - yield from run_test(dut, stimulus_a, stimulus_b, add, get_rs_case) - count += len(stimulus_a) - print (count, "vectors passed") - - yield from run_corner_cases(dut, count, add, get_rs_case) - yield from run_edge_cases(dut, count, add, get_rs_case) - -if __name__ == '__main__': - dut = FPADD(width=32, id_wid=5, single_cycle=True) - run_simulation(dut, testbench(dut), vcd_name="test_add.vcd") - diff --git a/src/add/test_add16.py b/src/add/test_add16.py deleted file mode 100644 index f39ae8ae..00000000 --- a/src/add/test_add16.py +++ /dev/null @@ -1,44 +0,0 @@ -from operator import add - -from nmigen import Module, Signal -from nmigen.compat.sim import run_simulation - -from nmigen_add_experiment import FPADD - -from unit_test_half import (get_mantissa, get_exponent, get_sign, is_nan, - is_inf, is_pos_inf, is_neg_inf, - match, get_case, check_case, run_test, - run_edge_cases, run_corner_cases) - -def testbench(dut): - #yield from check_case(dut, 0x7800, 0xff6f, 0xff6f) - #yield from check_case(dut, 0x0000, 0x7c32, 0x7e32) - #yield from check_case(dut, 0x0000, 0x7da9, 0x7fa9) - #yield from check_case(dut, 0x0000, 0x7ea0, 0x7ea0) - #yield from check_case(dut, 0x7c9a, 0x8000, 0x7e9a) - #yield from check_case(dut, 0x7d5e, 0x0000, 0x7f5e) - #yield from check_case(dut, 0x8000, 0x7c8c, 0x7e8c) - #yield from check_case(dut, 0x8000, 0xfc55, 0xfe55) - #yield from check_case(dut, 0x8000, 0x7e1a, 0x7e1a) - - #yield from check_case(dut, 0x8000, 0xfc01, 0x7e00) - yield from check_case(dut, 0xfc00, 0x7c00, 0x7e00) - yield from check_case(dut, 0x8000, 0, 0) - yield from check_case(dut, 0, 0, 0) - - count = 0 - - #regression tests - stimulus_a = [ 0x8000, 0x8000 ] - stimulus_b = [ 0x0000, 0xfc01 ] - yield from run_test(dut, stimulus_a, stimulus_b, add) - count += len(stimulus_a) - print (count, "vectors passed") - - yield from run_corner_cases(dut, count, add) - yield from run_edge_cases(dut, count, add) - -if __name__ == '__main__': - dut = FPADD(width=16, single_cycle=True) - run_simulation(dut, testbench(dut), vcd_name="test_add16.vcd") - diff --git a/src/add/test_add64.py b/src/add/test_add64.py deleted file mode 100644 index dcca12c6..00000000 --- a/src/add/test_add64.py +++ /dev/null @@ -1,45 +0,0 @@ -from nmigen import Module, Signal -from nmigen.compat.sim import run_simulation -from operator import add - -from nmigen_add_experiment import FPADD - -import sys -import atexit -from random import randint -from random import seed - -from unit_test_double import (get_mantissa, get_exponent, get_sign, is_nan, - is_inf, is_pos_inf, is_neg_inf, - match, get_case, check_case, run_test, - run_edge_cases, run_corner_cases) - - -def testbench(dut): - yield from check_case(dut, 0, 0, 0) - yield from check_case(dut, 0x3FF0000000000000, 0x4000000000000000, - 0x4008000000000000) - yield from check_case(dut, 0x4000000000000000, 0x3FF0000000000000, - 0x4008000000000000) - yield from check_case(dut, 0x4056C00000000000, 0x4042800000000000, - 0x4060000000000000) - yield from check_case(dut, 0x4056C00000000000, 0x4042EA3D70A3D70A, - 0x40601A8F5C28F5C2) - - count = 0 - - #regression tests - stimulus_a = [0x3ff00000000000c5, 0xff80000000000000] - stimulus_b = [0xbd28a404211fb72b, 0x7f80000000000000] - yield from run_test(dut, stimulus_a, stimulus_b, add) - count += len(stimulus_a) - print (count, "vectors passed") - - yield from run_corner_cases(dut, count, add) - yield from run_edge_cases(dut, count, add) - - -if __name__ == '__main__': - dut = FPADD(width=64, single_cycle=False) - run_simulation(dut, testbench(dut), vcd_name="test_add64.vcd") - diff --git a/src/add/test_add_base.py b/src/add/test_add_base.py deleted file mode 100644 index 248f719a..00000000 --- a/src/add/test_add_base.py +++ /dev/null @@ -1,94 +0,0 @@ -from random import randint -from operator import add - -from nmigen import Module, Signal -from nmigen.compat.sim import run_simulation - -from nmigen_add_experiment import FPADDBase, FPADDBaseMod - -def get_case(dut, a, b, mid): - yield dut.in_mid.eq(mid) - yield dut.in_a.eq(a) - yield dut.in_b.eq(b) - yield dut.in_t.stb.eq(1) - yield - yield - yield - yield - ack = (yield dut.in_t.ack) - assert ack == 0 - - yield dut.in_t.stb.eq(0) - - yield dut.out_z.ack.eq(1) - - while True: - out_z_stb = (yield dut.out_z.stb) - if not out_z_stb: - yield - continue - out_z = yield dut.out_z.v - out_mid = yield dut.out_mid - yield dut.out_z.ack.eq(0) - yield - break - - return out_z, out_mid - -def check_case(dut, a, b, z, mid=None): - if mid is None: - mid = randint(0, 6) - out_z, out_mid = yield from get_case(dut, a, b, mid) - assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z) - assert out_mid == mid, "Output mid 0x%x != expected 0x%x" % (out_mid, mid) - - - -def testbench(dut): - yield from check_case(dut, 0x36093399, 0x7f6a12f1, 0x7f6a12f1) - yield from check_case(dut, 0x006CE3EE, 0x806CE3EC, 0x00000002) - yield from check_case(dut, 0x00000047, 0x80000048, 0x80000001) - yield from check_case(dut, 0x000116C2, 0x8001170A, 0x80000048) - yield from check_case(dut, 0x7ed01f25, 0xff559e2c, 0xfedb1d33) - yield from check_case(dut, 0, 0, 0) - yield from check_case(dut, 0xFFFFFFFF, 0xC63B800A, 0x7FC00000) - yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000) - #yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000) - yield from check_case(dut, 0x7F800000, 0xFF800000, 0x7FC00000) - yield from check_case(dut, 0x42540000, 0xC2540000, 0x00000000) - yield from check_case(dut, 0xC2540000, 0x42540000, 0x00000000) - yield from check_case(dut, 0xfe34f995, 0xff5d59ad, 0xff800000) - yield from check_case(dut, 0x82471f51, 0x243985f, 0x801c3790) - yield from check_case(dut, 0x40000000, 0xc0000000, 0x00000000) - yield from check_case(dut, 0x3F800000, 0x40000000, 0x40400000) - yield from check_case(dut, 0x40000000, 0x3F800000, 0x40400000) - yield from check_case(dut, 0x447A0000, 0x4488B000, 0x4502D800) - yield from check_case(dut, 0x463B800A, 0x42BA8A3D, 0x463CF51E) - yield from check_case(dut, 0x42BA8A3D, 0x463B800A, 0x463CF51E) - yield from check_case(dut, 0x463B800A, 0xC2BA8A3D, 0x463A0AF6) - yield from check_case(dut, 0xC2BA8A3D, 0x463B800A, 0x463A0AF6) - yield from check_case(dut, 0xC63B800A, 0x42BA8A3D, 0xC63A0AF6) - yield from check_case(dut, 0x42BA8A3D, 0xC63B800A, 0xC63A0AF6) - yield from check_case(dut, 0x7F800000, 0x00000000, 0x7F800000) - yield from check_case(dut, 0x00000000, 0x7F800000, 0x7F800000) - yield from check_case(dut, 0xFF800000, 0x00000000, 0xFF800000) - yield from check_case(dut, 0x00000000, 0xFF800000, 0xFF800000) - yield from check_case(dut, 0x7F800000, 0x7F800000, 0x7F800000) - yield from check_case(dut, 0xFF800000, 0xFF800000, 0xFF800000) - yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000) - yield from check_case(dut, 0x00018643, 0x00FA72A4, 0x00FBF8E7) - yield from check_case(dut, 0x001A2239, 0x00FA72A4, 0x010A4A6E) - yield from check_case(dut, 0x3F7FFFFE, 0x3F7FFFFE, 0x3FFFFFFE) - yield from check_case(dut, 0x7EFFFFEE, 0x7EFFFFEE, 0x7F7FFFEE) - yield from check_case(dut, 0x7F7FFFEE, 0xFEFFFFEE, 0x7EFFFFEE) - yield from check_case(dut, 0x7F7FFFEE, 0x756CA884, 0x7F7FFFFD) - yield from check_case(dut, 0x7F7FFFEE, 0x758A0CF8, 0x7F7FFFFF) - yield from check_case(dut, 0x42500000, 0x51A7A358, 0x51A7A358) - yield from check_case(dut, 0x51A7A358, 0x42500000, 0x51A7A358) - yield from check_case(dut, 0x4E5693A4, 0x42500000, 0x4E5693A5) - yield from check_case(dut, 0x42500000, 0x4E5693A4, 0x4E5693A5) - -if __name__ == '__main__': - dut = FPADDBaseMod(width=32, id_wid=5, single_cycle=True) - run_simulation(dut, testbench(dut), vcd_name="test_add.vcd") - diff --git a/src/add/test_buf_pipe.py b/src/add/test_buf_pipe.py deleted file mode 100644 index 37f2b31f..00000000 --- a/src/add/test_buf_pipe.py +++ /dev/null @@ -1,1308 +0,0 @@ -""" Unit tests for Buffered and Unbuffered pipelines - - contains useful worked examples of how to use the Pipeline API, - including: - - * Combinatorial Stage "Chaining" - * class-based data stages - * nmigen module-based data stages - * special nmigen module-based data stage, where the stage *is* the module - * Record-based data stages - * static-class data stages - * multi-stage pipelines (and how to connect them) - * how to *use* the pipelines (see Test5) - how to get data in and out - -""" - -from nmigen import Module, Signal, Mux, Const, Elaboratable -from nmigen.hdl.rec import Record -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil - -from example_buf_pipe import ExampleBufPipe, ExampleBufPipeAdd -from example_buf_pipe import ExamplePipeline, UnbufferedPipeline -from example_buf_pipe import ExampleStageCls -from example_buf_pipe import PrevControl, NextControl, BufferedHandshake -from example_buf_pipe import StageChain, ControlBase, StageCls -from singlepipe import UnbufferedPipeline2 -from singlepipe import SimpleHandshake -from singlepipe import PassThroughHandshake -from singlepipe import PassThroughStage -from singlepipe import FIFOControl -from singlepipe import RecordObject - -from random import randint, seed - -#seed(4) - - -def check_o_n_valid(dut, val): - o_n_valid = yield dut.n.valid_o - assert o_n_valid == val - -def check_o_n_valid2(dut, val): - o_n_valid = yield dut.n.valid_o - assert o_n_valid == val - - -def tbench(dut): - #yield dut.i_p_rst.eq(1) - yield dut.n.ready_i.eq(0) - #yield dut.p.ready_o.eq(0) - yield - yield - #yield dut.i_p_rst.eq(0) - yield dut.n.ready_i.eq(1) - yield dut.p.data_i.eq(5) - yield dut.p.valid_i.eq(1) - yield - - yield dut.p.data_i.eq(7) - yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed - yield - yield from check_o_n_valid(dut, 1) # ok *now* i_p_valid effect is felt - - yield dut.p.data_i.eq(2) - yield - yield dut.n.ready_i.eq(0) # begin going into "stall" (next stage says ready) - yield dut.p.data_i.eq(9) - yield - yield dut.p.valid_i.eq(0) - yield dut.p.data_i.eq(12) - yield - yield dut.p.data_i.eq(32) - yield dut.n.ready_i.eq(1) - yield - yield from check_o_n_valid(dut, 1) # buffer still needs to output - yield - yield from check_o_n_valid(dut, 1) # buffer still needs to output - yield - yield from check_o_n_valid(dut, 0) # buffer outputted, *now* we're done. - yield - - -def tbench2(dut): - #yield dut.p.i_rst.eq(1) - yield dut.n.ready_i.eq(0) - #yield dut.p.ready_o.eq(0) - yield - yield - #yield dut.p.i_rst.eq(0) - yield dut.n.ready_i.eq(1) - yield dut.p.data_i.eq(5) - yield dut.p.valid_i.eq(1) - yield - - yield dut.p.data_i.eq(7) - yield from check_o_n_valid2(dut, 0) # effects of i_p_valid delayed 2 clocks - yield - yield from check_o_n_valid2(dut, 0) # effects of i_p_valid delayed 2 clocks - - yield dut.p.data_i.eq(2) - yield - yield from check_o_n_valid2(dut, 1) # ok *now* i_p_valid effect is felt - yield dut.n.ready_i.eq(0) # begin going into "stall" (next stage says ready) - yield dut.p.data_i.eq(9) - yield - yield dut.p.valid_i.eq(0) - yield dut.p.data_i.eq(12) - yield - yield dut.p.data_i.eq(32) - yield dut.n.ready_i.eq(1) - yield - yield from check_o_n_valid2(dut, 1) # buffer still needs to output - yield - yield from check_o_n_valid2(dut, 1) # buffer still needs to output - yield - yield from check_o_n_valid2(dut, 1) # buffer still needs to output - yield - yield from check_o_n_valid2(dut, 0) # buffer outputted, *now* we're done. - yield - yield - yield - - -class Test3: - def __init__(self, dut, resultfn): - self.dut = dut - self.resultfn = resultfn - self.data = [] - for i in range(num_tests): - #data.append(randint(0, 1<<16-1)) - self.data.append(i+1) - self.i = 0 - self.o = 0 - - def send(self): - while self.o != len(self.data): - send_range = randint(0, 3) - for j in range(randint(1,10)): - if send_range == 0: - send = True - else: - send = randint(0, send_range) != 0 - o_p_ready = yield self.dut.p.ready_o - if not o_p_ready: - yield - continue - if send and self.i != len(self.data): - yield self.dut.p.valid_i.eq(1) - yield self.dut.p.data_i.eq(self.data[self.i]) - self.i += 1 - else: - yield self.dut.p.valid_i.eq(0) - yield - - def rcv(self): - while self.o != len(self.data): - stall_range = randint(0, 3) - for j in range(randint(1,10)): - stall = randint(0, stall_range) != 0 - yield self.dut.n.ready_i.eq(stall) - yield - o_n_valid = yield self.dut.n.valid_o - i_n_ready = yield self.dut.n.ready_i_test - if not o_n_valid or not i_n_ready: - continue - data_o = yield self.dut.n.data_o - self.resultfn(data_o, self.data[self.o], self.i, self.o) - self.o += 1 - if self.o == len(self.data): - break - -def resultfn_3(data_o, expected, i, o): - assert data_o == expected + 1, \ - "%d-%d data %x not match %x\n" \ - % (i, o, data_o, expected) - -def data_placeholder(): - data = [] - for i in range(num_tests): - d = PlaceHolder() - d.src1 = randint(0, 1<<16-1) - d.src2 = randint(0, 1<<16-1) - data.append(d) - return data - -def data_dict(): - data = [] - for i in range(num_tests): - data.append({'src1': randint(0, 1<<16-1), - 'src2': randint(0, 1<<16-1)}) - return data - - -class Test5: - def __init__(self, dut, resultfn, data=None, stage_ctl=False): - self.dut = dut - self.resultfn = resultfn - self.stage_ctl = stage_ctl - if data: - self.data = data - else: - self.data = [] - for i in range(num_tests): - self.data.append((randint(0, 1<<16-1), randint(0, 1<<16-1))) - self.i = 0 - self.o = 0 - - def send(self): - while self.o != len(self.data): - send_range = randint(0, 3) - for j in range(randint(1,10)): - if send_range == 0: - send = True - else: - send = randint(0, send_range) != 0 - #send = True - o_p_ready = yield self.dut.p.ready_o - if not o_p_ready: - yield - continue - if send and self.i != len(self.data): - yield self.dut.p.valid_i.eq(1) - for v in self.dut.set_input(self.data[self.i]): - yield v - self.i += 1 - else: - yield self.dut.p.valid_i.eq(0) - yield - - def rcv(self): - while self.o != len(self.data): - stall_range = randint(0, 3) - for j in range(randint(1,10)): - ready = randint(0, stall_range) != 0 - #ready = True - yield self.dut.n.ready_i.eq(ready) - yield - o_n_valid = yield self.dut.n.valid_o - i_n_ready = yield self.dut.n.ready_i_test - if not o_n_valid or not i_n_ready: - continue - if isinstance(self.dut.n.data_o, Record): - data_o = {} - dod = self.dut.n.data_o - for k, v in dod.fields.items(): - data_o[k] = yield v - else: - data_o = yield self.dut.n.data_o - self.resultfn(data_o, self.data[self.o], self.i, self.o) - self.o += 1 - if self.o == len(self.data): - break - -def resultfn_5(data_o, expected, i, o): - res = expected[0] + expected[1] - assert data_o == res, \ - "%d-%d data %x not match %s\n" \ - % (i, o, data_o, repr(expected)) - -def tbench4(dut): - data = [] - for i in range(num_tests): - #data.append(randint(0, 1<<16-1)) - data.append(i+1) - i = 0 - o = 0 - while True: - stall = randint(0, 3) != 0 - send = randint(0, 5) != 0 - yield dut.n.ready_i.eq(stall) - o_p_ready = yield dut.p.ready_o - if o_p_ready: - if send and i != len(data): - yield dut.p.valid_i.eq(1) - yield dut.p.data_i.eq(data[i]) - i += 1 - else: - yield dut.p.valid_i.eq(0) - yield - o_n_valid = yield dut.n.valid_o - i_n_ready = yield dut.n.ready_i_test - if o_n_valid and i_n_ready: - data_o = yield dut.n.data_o - assert data_o == data[o] + 2, "%d-%d data %x not match %x\n" \ - % (i, o, data_o, data[o]) - o += 1 - if o == len(data): - break - -###################################################################### -# Test 2 and 4 -###################################################################### - -class ExampleBufPipe2(ControlBase): - """ Example of how to do chained pipeline stages. - """ - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - - pipe1 = ExampleBufPipe() - pipe2 = ExampleBufPipe() - - m.submodules.pipe1 = pipe1 - m.submodules.pipe2 = pipe2 - - m.d.comb += self.connect([pipe1, pipe2]) - - return m - - -###################################################################### -# Test 9 -###################################################################### - -class ExampleBufPipeChain2(BufferedHandshake): - """ connects two stages together as a *single* combinatorial stage. - """ - def __init__(self): - stage1 = ExampleStageCls() - stage2 = ExampleStageCls() - combined = StageChain([stage1, stage2]) - BufferedHandshake.__init__(self, combined) - - -def data_chain2(): - data = [] - for i in range(num_tests): - data.append(randint(0, 1<<16-2)) - return data - - -def resultfn_9(data_o, expected, i, o): - res = expected + 2 - assert data_o == res, \ - "%d-%d received data %x not match expected %x\n" \ - % (i, o, data_o, res) - - -###################################################################### -# Test 6 and 10 -###################################################################### - -class SetLessThan(Elaboratable): - def __init__(self, width, signed): - self.m = Module() - self.src1 = Signal((width, signed), name="src1") - self.src2 = Signal((width, signed), name="src2") - self.output = Signal(width, name="out") - - def elaborate(self, platform): - self.m.d.comb += self.output.eq(Mux(self.src1 < self.src2, 1, 0)) - return self.m - - -class LTStage(StageCls): - """ module-based stage example - """ - def __init__(self): - self.slt = SetLessThan(16, True) - - def ispec(self, name): - return (Signal(16, name="%s_sig1" % name), - Signal(16, name="%s_sig2" % name)) - - def ospec(self, name): - return Signal(16, "%s_out" % name) - - def setup(self, m, i): - self.o = Signal(16) - m.submodules.slt = self.slt - m.d.comb += self.slt.src1.eq(i[0]) - m.d.comb += self.slt.src2.eq(i[1]) - m.d.comb += self.o.eq(self.slt.output) - - def process(self, i): - return self.o - - -class LTStageDerived(SetLessThan, StageCls): - """ special version of a nmigen module where the module is also a stage - - shows that you don't actually need to combinatorially connect - to the outputs, or add the module as a submodule: just return - the module output parameter(s) from the Stage.process() function - """ - - def __init__(self): - SetLessThan.__init__(self, 16, True) - - def ispec(self): - return (Signal(16), Signal(16)) - - def ospec(self): - return Signal(16) - - def setup(self, m, i): - m.submodules.slt = self - m.d.comb += self.src1.eq(i[0]) - m.d.comb += self.src2.eq(i[1]) - - def process(self, i): - return self.output - - -class ExampleLTPipeline(UnbufferedPipeline): - """ an example of how to use the unbuffered pipeline. - """ - - def __init__(self): - stage = LTStage() - UnbufferedPipeline.__init__(self, stage) - - -class ExampleLTBufferedPipeDerived(BufferedHandshake): - """ an example of how to use the buffered pipeline. - """ - - def __init__(self): - stage = LTStageDerived() - BufferedHandshake.__init__(self, stage) - - -def resultfn_6(data_o, expected, i, o): - res = 1 if expected[0] < expected[1] else 0 - assert data_o == res, \ - "%d-%d data %x not match %s\n" \ - % (i, o, data_o, repr(expected)) - - -###################################################################### -# Test 7 -###################################################################### - -class ExampleAddRecordStage(StageCls): - """ example use of a Record - """ - - record_spec = [('src1', 16), ('src2', 16)] - def ispec(self): - """ returns a Record using the specification - """ - return Record(self.record_spec) - - def ospec(self): - return Record(self.record_spec) - - def process(self, i): - """ process the input data, returning a dictionary with key names - that exactly match the Record's attributes. - """ - return {'src1': i.src1 + 1, - 'src2': i.src2 + 1} - -###################################################################### -# Test 11 -###################################################################### - -class ExampleAddRecordPlaceHolderStage(StageCls): - """ example use of a Record, with a placeholder as the processing result - """ - - record_spec = [('src1', 16), ('src2', 16)] - def ispec(self): - """ returns a Record using the specification - """ - return Record(self.record_spec) - - def ospec(self): - return Record(self.record_spec) - - def process(self, i): - """ process the input data, returning a PlaceHolder class instance - with attributes that exactly match those of the Record. - """ - o = PlaceHolder() - o.src1 = i.src1 + 1 - o.src2 = i.src2 + 1 - return o - - -# a dummy class that may have stuff assigned to instances once created -class PlaceHolder: pass - - -class ExampleAddRecordPipe(UnbufferedPipeline): - """ an example of how to use the combinatorial pipeline. - """ - - def __init__(self): - stage = ExampleAddRecordStage() - UnbufferedPipeline.__init__(self, stage) - - -def resultfn_7(data_o, expected, i, o): - res = (expected['src1'] + 1, expected['src2'] + 1) - assert data_o['src1'] == res[0] and data_o['src2'] == res[1], \ - "%d-%d data %s not match %s\n" \ - % (i, o, repr(data_o), repr(expected)) - - -class ExampleAddRecordPlaceHolderPipe(UnbufferedPipeline): - """ an example of how to use the combinatorial pipeline. - """ - - def __init__(self): - stage = ExampleAddRecordPlaceHolderStage() - UnbufferedPipeline.__init__(self, stage) - - -def resultfn_test11(data_o, expected, i, o): - res1 = expected.src1 + 1 - res2 = expected.src2 + 1 - assert data_o['src1'] == res1 and data_o['src2'] == res2, \ - "%d-%d data %s not match %s\n" \ - % (i, o, repr(data_o), repr(expected)) - - -###################################################################### -# Test 8 -###################################################################### - - -class Example2OpClass: - """ an example of a class used to store 2 operands. - requires an eq function, to conform with the pipeline stage API - """ - - def __init__(self): - self.op1 = Signal(16) - self.op2 = Signal(16) - - def eq(self, i): - return [self.op1.eq(i.op1), self.op2.eq(i.op2)] - - -class ExampleAddClassStage(StageCls): - """ an example of how to use the buffered pipeline, as a class instance - """ - - def ispec(self): - """ returns an instance of an Example2OpClass. - """ - return Example2OpClass() - - def ospec(self): - """ returns an output signal which will happen to contain the sum - of the two inputs - """ - return Signal(16, name="add2_out") - - def process(self, i): - """ process the input data (sums the values in the tuple) and returns it - """ - return i.op1 + i.op2 - - -class ExampleBufPipeAddClass(BufferedHandshake): - """ an example of how to use the buffered pipeline, using a class instance - """ - - def __init__(self): - addstage = ExampleAddClassStage() - BufferedHandshake.__init__(self, addstage) - - -class TestInputAdd: - """ the eq function, called by set_input, needs an incoming object - that conforms to the Example2OpClass.eq function requirements - easiest way to do that is to create a class that has the exact - same member layout (self.op1, self.op2) as Example2OpClass - """ - def __init__(self, op1, op2): - self.op1 = op1 - self.op2 = op2 - - -def resultfn_8(data_o, expected, i, o): - res = expected.op1 + expected.op2 # these are a TestInputAdd instance - assert data_o == res, \ - "%d-%d data %s res %x not match %s\n" \ - % (i, o, repr(data_o), res, repr(expected)) - -def data_2op(): - data = [] - for i in range(num_tests): - data.append(TestInputAdd(randint(0, 1<<16-1), randint(0, 1<<16-1))) - return data - - -###################################################################### -# Test 12 -###################################################################### - -class ExampleStageDelayCls(StageCls, Elaboratable): - """ an example of how to use the buffered pipeline, in a static class - fashion - """ - - def __init__(self, valid_trigger=2): - self.count = Signal(2) - self.valid_trigger = valid_trigger - - def ispec(self): - return Signal(16, name="example_input_signal") - - def ospec(self): - return Signal(16, name="example_output_signal") - - @property - def d_ready(self): - """ data is ready to be accepted when this is true - """ - return (self.count == 1)# | (self.count == 3) - return Const(1) - - def d_valid(self, ready_i): - """ data is valid at output when this is true - """ - return self.count == self.valid_trigger - return Const(1) - - def process(self, i): - """ process the input data and returns it (adds 1) - """ - return i + 1 - - def elaborate(self, platform): - m = Module() - m.d.sync += self.count.eq(self.count + 1) - return m - - -class ExampleBufDelayedPipe(BufferedHandshake): - - def __init__(self): - stage = ExampleStageDelayCls(valid_trigger=2) - BufferedHandshake.__init__(self, stage, stage_ctl=True) - - def elaborate(self, platform): - m = BufferedHandshake.elaborate(self, platform) - m.submodules.stage = self.stage - return m - - -def data_chain1(): - data = [] - for i in range(num_tests): - data.append(1<<((i*3)%15)) - #data.append(randint(0, 1<<16-2)) - #print (hex(data[-1])) - return data - - -def resultfn_12(data_o, expected, i, o): - res = expected + 1 - assert data_o == res, \ - "%d-%d data %x not match %x\n" \ - % (i, o, data_o, res) - - -###################################################################### -# Test 13 -###################################################################### - -class ExampleUnBufDelayedPipe(BufferedHandshake): - - def __init__(self): - stage = ExampleStageDelayCls(valid_trigger=3) - BufferedHandshake.__init__(self, stage, stage_ctl=True) - - def elaborate(self, platform): - m = BufferedHandshake.elaborate(self, platform) - m.submodules.stage = self.stage - return m - -###################################################################### -# Test 15 -###################################################################### - -class ExampleBufModeAdd1Pipe(SimpleHandshake): - - def __init__(self): - stage = ExampleStageCls() - SimpleHandshake.__init__(self, stage) - - -###################################################################### -# Test 16 -###################################################################### - -class ExampleBufModeUnBufPipe(ControlBase): - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - - pipe1 = ExampleBufModeAdd1Pipe() - pipe2 = ExampleBufAdd1Pipe() - - m.submodules.pipe1 = pipe1 - m.submodules.pipe2 = pipe2 - - m.d.comb += self.connect([pipe1, pipe2]) - - return m - -###################################################################### -# Test 17 -###################################################################### - -class ExampleUnBufAdd1Pipe2(UnbufferedPipeline2): - - def __init__(self): - stage = ExampleStageCls() - UnbufferedPipeline2.__init__(self, stage) - - -###################################################################### -# Test 18 -###################################################################### - -class PassThroughTest(PassThroughHandshake): - - def iospecfn(self): - return Signal(16, "out") - - def __init__(self): - stage = PassThroughStage(self.iospecfn) - PassThroughHandshake.__init__(self, stage) - -def resultfn_identical(data_o, expected, i, o): - res = expected - assert data_o == res, \ - "%d-%d data %x not match %x\n" \ - % (i, o, data_o, res) - - -###################################################################### -# Test 19 -###################################################################### - -class ExamplePassAdd1Pipe(PassThroughHandshake): - - def __init__(self): - stage = ExampleStageCls() - PassThroughHandshake.__init__(self, stage) - - -class ExampleBufPassThruPipe(ControlBase): - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - - # XXX currently fails: any other permutation works fine. - # p1=u,p2=b ok p1=u,p2=u ok p1=b,p2=b ok - # also fails using UnbufferedPipeline as well - pipe1 = ExampleBufModeAdd1Pipe() - pipe2 = ExamplePassAdd1Pipe() - - m.submodules.pipe1 = pipe1 - m.submodules.pipe2 = pipe2 - - m.d.comb += self.connect([pipe1, pipe2]) - - return m - - -###################################################################### -# Test 20 -###################################################################### - -def iospecfn(): - return Signal(16, name="d_in") - -class FIFOTest16(FIFOControl): - - def __init__(self): - stage = PassThroughStage(iospecfn) - FIFOControl.__init__(self, 2, stage) - - -###################################################################### -# Test 21 -###################################################################### - -class ExampleFIFOPassThruPipe1(ControlBase): - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - - pipe1 = FIFOTest16() - pipe2 = FIFOTest16() - pipe3 = ExamplePassAdd1Pipe() - - m.submodules.pipe1 = pipe1 - m.submodules.pipe2 = pipe2 - m.submodules.pipe3 = pipe3 - - m.d.comb += self.connect([pipe1, pipe2, pipe3]) - - return m - - -###################################################################### -# Test 22 -###################################################################### - -class Example2OpRecord(RecordObject): - def __init__(self): - RecordObject.__init__(self) - self.op1 = Signal(16) - self.op2 = Signal(16) - - -class ExampleAddRecordObjectStage(StageCls): - - def ispec(self): - """ returns an instance of an Example2OpRecord. - """ - return Example2OpRecord() - - def ospec(self): - """ returns an output signal which will happen to contain the sum - of the two inputs - """ - return Signal(16) - - def process(self, i): - """ process the input data (sums the values in the tuple) and returns it - """ - return i.op1 + i.op2 - - -class ExampleRecordHandshakeAddClass(SimpleHandshake): - - def __init__(self): - addstage = ExampleAddRecordObjectStage() - SimpleHandshake.__init__(self, stage=addstage) - - -###################################################################### -# Test 23 -###################################################################### - -def iospecfnrecord(): - return Example2OpRecord() - -class FIFOTestRecordControl(FIFOControl): - - def __init__(self): - stage = PassThroughStage(iospecfnrecord) - FIFOControl.__init__(self, 2, stage) - - -class ExampleFIFORecordObjectPipe(ControlBase): - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - - pipe1 = FIFOTestRecordControl() - pipe2 = ExampleRecordHandshakeAddClass() - - m.submodules.pipe1 = pipe1 - m.submodules.pipe2 = pipe2 - - m.d.comb += self.connect([pipe1, pipe2]) - - return m - - -###################################################################### -# Test 24 -###################################################################### - -class FIFOTestRecordAddStageControl(FIFOControl): - - def __init__(self): - stage = ExampleAddRecordObjectStage() - FIFOControl.__init__(self, 2, stage) - - - -###################################################################### -# Test 25 -###################################################################### - -class FIFOTestAdd16(FIFOControl): - - def __init__(self): - stage = ExampleStageCls() - FIFOControl.__init__(self, 2, stage) - - -class ExampleFIFOAdd2Pipe(ControlBase): - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - - pipe1 = FIFOTestAdd16() - pipe2 = FIFOTestAdd16() - - m.submodules.pipe1 = pipe1 - m.submodules.pipe2 = pipe2 - - m.d.comb += self.connect([pipe1, pipe2]) - - return m - - -###################################################################### -# Test 26 -###################################################################### - -def iospecfn24(): - return (Signal(16, name="src1"), Signal(16, name="src2")) - -class FIFOTest2x16(FIFOControl): - - def __init__(self): - stage = PassThroughStage(iospecfn2) - FIFOControl.__init__(self, 2, stage) - - -###################################################################### -# Test 997 -###################################################################### - -class ExampleBufPassThruPipe2(ControlBase): - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - - # XXX currently fails: any other permutation works fine. - # p1=u,p2=b ok p1=u,p2=u ok p1=b,p2=b ok - # also fails using UnbufferedPipeline as well - #pipe1 = ExampleUnBufAdd1Pipe() - #pipe2 = ExampleBufAdd1Pipe() - pipe1 = ExampleBufAdd1Pipe() - pipe2 = ExamplePassAdd1Pipe() - - m.submodules.pipe1 = pipe1 - m.submodules.pipe2 = pipe2 - - m.d.comb += self.connect([pipe1, pipe2]) - - return m - - -###################################################################### -# Test 998 -###################################################################### - -class ExampleBufPipe3(ControlBase): - """ Example of how to do delayed pipeline, where the stage signals - whether it is ready. - """ - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - - pipe1 = ExampleBufDelayedPipe() - pipe2 = ExampleBufPipe() - - m.submodules.pipe1 = pipe1 - m.submodules.pipe2 = pipe2 - - m.d.comb += self.connect([pipe1, pipe2]) - - return m - -###################################################################### -# Test 999 - XXX FAILS -# http://bugs.libre-riscv.org/show_bug.cgi?id=57 -###################################################################### - -class ExampleBufAdd1Pipe(BufferedHandshake): - - def __init__(self): - stage = ExampleStageCls() - BufferedHandshake.__init__(self, stage) - - -class ExampleUnBufAdd1Pipe(UnbufferedPipeline): - - def __init__(self): - stage = ExampleStageCls() - UnbufferedPipeline.__init__(self, stage) - - -class ExampleBufUnBufPipe(ControlBase): - - def elaborate(self, platform): - m = ControlBase.elaborate(self, platform) - - # XXX currently fails: any other permutation works fine. - # p1=u,p2=b ok p1=u,p2=u ok p1=b,p2=b ok - # also fails using UnbufferedPipeline as well - #pipe1 = ExampleUnBufAdd1Pipe() - #pipe2 = ExampleBufAdd1Pipe() - pipe1 = ExampleBufAdd1Pipe() - pipe2 = ExampleUnBufAdd1Pipe() - - m.submodules.pipe1 = pipe1 - m.submodules.pipe2 = pipe2 - - m.d.comb += self.connect([pipe1, pipe2]) - - return m - - -###################################################################### -# Unit Tests -###################################################################### - -num_tests = 10 - -if __name__ == '__main__': - if False: - print ("test 1") - dut = ExampleBufPipe() - run_simulation(dut, tbench(dut), vcd_name="test_bufpipe.vcd") - - print ("test 2") - dut = ExampleBufPipe2() - run_simulation(dut, tbench2(dut), vcd_name="test_bufpipe2.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_bufpipe2.il", "w") as f: - f.write(vl) - - - print ("test 3") - dut = ExampleBufPipe() - test = Test3(dut, resultfn_3) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe3.vcd") - - print ("test 3.5") - dut = ExamplePipeline() - test = Test3(dut, resultfn_3) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_combpipe3.vcd") - - print ("test 4") - dut = ExampleBufPipe2() - run_simulation(dut, tbench4(dut), vcd_name="test_bufpipe4.vcd") - - print ("test 5") - dut = ExampleBufPipeAdd() - test = Test5(dut, resultfn_5, stage_ctl=True) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe5.vcd") - - print ("test 6") - dut = ExampleLTPipeline() - test = Test5(dut, resultfn_6) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_ltcomb6.vcd") - - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - list(dut.p.data_i) + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_ltcomb_pipe.il", "w") as f: - f.write(vl) - - print ("test 7") - dut = ExampleAddRecordPipe() - data=data_dict() - test = Test5(dut, resultfn_7, data=data) - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o, - dut.p.data_i.src1, dut.p.data_i.src2, - dut.n.data_o.src1, dut.n.data_o.src2] - vl = rtlil.convert(dut, ports=ports) - with open("test_recordcomb_pipe.il", "w") as f: - f.write(vl) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord.vcd") - - print ("test 8") - dut = ExampleBufPipeAddClass() - data=data_2op() - test = Test5(dut, resultfn_8, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe8.vcd") - - print ("test 9") - dut = ExampleBufPipeChain2() - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_bufpipechain2.il", "w") as f: - f.write(vl) - - data = data_chain2() - test = Test5(dut, resultfn_9, data=data) - run_simulation(dut, [test.send, test.rcv], - vcd_name="test_bufpipechain2.vcd") - - print ("test 10") - dut = ExampleLTBufferedPipeDerived() - test = Test5(dut, resultfn_6) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_ltbufpipe10.vcd") - vl = rtlil.convert(dut, ports=ports) - with open("test_ltbufpipe10.il", "w") as f: - f.write(vl) - - print ("test 11") - dut = ExampleAddRecordPlaceHolderPipe() - data=data_placeholder() - test = Test5(dut, resultfn_test11, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord.vcd") - - - print ("test 12") - dut = ExampleBufDelayedPipe() - data = data_chain1() - test = Test5(dut, resultfn_12, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe12.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_bufpipe12.il", "w") as f: - f.write(vl) - - print ("test 13") - dut = ExampleUnBufDelayedPipe() - data = data_chain1() - test = Test5(dut, resultfn_12, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_unbufpipe13.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_unbufpipe13.il", "w") as f: - f.write(vl) - - print ("test 15") - dut = ExampleBufModeAdd1Pipe() - data = data_chain1() - test = Test5(dut, resultfn_12, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufunbuf15.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_bufunbuf15.il", "w") as f: - f.write(vl) - - print ("test 16") - dut = ExampleBufModeUnBufPipe() - data = data_chain1() - test = Test5(dut, resultfn_9, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufunbuf16.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_bufunbuf16.il", "w") as f: - f.write(vl) - - print ("test 17") - dut = ExampleUnBufAdd1Pipe2() - data = data_chain1() - test = Test5(dut, resultfn_12, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_unbufpipe17.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_unbufpipe17.il", "w") as f: - f.write(vl) - - print ("test 18") - dut = PassThroughTest() - data = data_chain1() - test = Test5(dut, resultfn_identical, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_passthru18.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_passthru18.il", "w") as f: - f.write(vl) - - print ("test 19") - dut = ExampleBufPassThruPipe() - data = data_chain1() - test = Test5(dut, resultfn_9, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpass19.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_bufpass19.il", "w") as f: - f.write(vl) - - print ("test 20") - dut = FIFOTest16() - data = data_chain1() - test = Test5(dut, resultfn_identical, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_fifo20.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_fifo20.il", "w") as f: - f.write(vl) - - print ("test 21") - dut = ExampleFIFOPassThruPipe1() - data = data_chain1() - test = Test5(dut, resultfn_12, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_fifopass21.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_fifopass21.il", "w") as f: - f.write(vl) - - print ("test 22") - dut = ExampleRecordHandshakeAddClass() - data=data_2op() - test = Test5(dut, resultfn_8, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord22.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i.op1, dut.p.data_i.op2] + \ - [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_addrecord22.il", "w") as f: - f.write(vl) - - print ("test 23") - dut = ExampleFIFORecordObjectPipe() - data=data_2op() - test = Test5(dut, resultfn_8, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord23.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i.op1, dut.p.data_i.op2] + \ - [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_addrecord23.il", "w") as f: - f.write(vl) - - print ("test 24") - dut = FIFOTestRecordAddStageControl() - data=data_2op() - test = Test5(dut, resultfn_8, data=data) - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i.op1, dut.p.data_i.op2] + \ - [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_addrecord24.il", "w") as f: - f.write(vl) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord24.vcd") - - print ("test 25") - dut = ExampleFIFOAdd2Pipe() - data = data_chain1() - test = Test5(dut, resultfn_9, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_add2pipe25.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_add2pipe25.il", "w") as f: - f.write(vl) - - print ("test 997") - dut = ExampleBufPassThruPipe2() - data = data_chain1() - test = Test5(dut, resultfn_9, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpass997.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_bufpass997.il", "w") as f: - f.write(vl) - - print ("test 998 (fails, bug)") - dut = ExampleBufPipe3() - data = data_chain1() - test = Test5(dut, resultfn_9, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe14.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_bufpipe14.il", "w") as f: - f.write(vl) - - print ("test 999 (expected to fail, which is a bug)") - dut = ExampleBufUnBufPipe() - data = data_chain1() - test = Test5(dut, resultfn_9, data=data) - run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufunbuf999.vcd") - ports = [dut.p.valid_i, dut.n.ready_i, - dut.n.valid_o, dut.p.ready_o] + \ - [dut.p.data_i] + [dut.n.data_o] - vl = rtlil.convert(dut, ports=ports) - with open("test_bufunbuf999.il", "w") as f: - f.write(vl) - diff --git a/src/add/test_div.py b/src/add/test_div.py deleted file mode 100644 index 3f192338..00000000 --- a/src/add/test_div.py +++ /dev/null @@ -1,47 +0,0 @@ -import sys -from random import randint -from random import seed -from operator import truediv - -from nmigen import Module, Signal -from nmigen.compat.sim import run_simulation - -from nmigen_div_experiment import FPDIV - -from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan, - is_inf, is_pos_inf, is_neg_inf, - match, get_case, check_case, run_test, - run_edge_cases, run_corner_cases) - - -def testbench(dut): - yield from check_case(dut, 0x80000000, 0x00000000, 0xffc00000) - yield from check_case(dut, 0x00000000, 0x80000000, 0xffc00000) - yield from check_case(dut, 0x0002b017, 0xff3807ab, 0x80000000) - yield from check_case(dut, 0x40000000, 0x3F800000, 0x40000000) - yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000) - yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB) - yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C) - yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2) - yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8) - yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC) - yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5) - yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2) - - count = 0 - - #regression tests - stimulus_a = [0xbf9b1e94, 0x34082401, 0x5e8ef81, 0x5c75da81, 0x2b017] - stimulus_b = [0xc038ed3a, 0xb328cd45, 0x114f3db, 0x2f642a39, 0xff3807ab] - yield from run_test(dut, stimulus_a, stimulus_b, truediv, get_case) - count += len(stimulus_a) - print (count, "vectors passed") - - yield from run_corner_cases(dut, count, truediv, get_case) - yield from run_edge_cases(dut, count, truediv, get_case) - - -if __name__ == '__main__': - dut = FPDIV(width=32) - run_simulation(dut, testbench(dut), vcd_name="test_div.vcd") - diff --git a/src/add/test_div64.py b/src/add/test_div64.py deleted file mode 100644 index 5a9daf23..00000000 --- a/src/add/test_div64.py +++ /dev/null @@ -1,67 +0,0 @@ -from nmigen import Module, Signal -from nmigen.compat.sim import run_simulation - -from nmigen_div_experiment import FPDIV - -class ORGate: - def __init__(self): - self.a = Signal() - self.b = Signal() - self.x = Signal() - - def elaborate(self, platform=None): - - m = Module() - m.d.comb += self.x.eq(self.a | self.b) - - return m - -def check_case(dut, a, b, z): - yield dut.in_a.v.eq(a) - yield dut.in_a.stb.eq(1) - yield - yield - a_ack = (yield dut.in_a.ack) - assert a_ack == 0 - yield dut.in_b.v.eq(b) - yield dut.in_b.stb.eq(1) - b_ack = (yield dut.in_b.ack) - assert b_ack == 0 - - while True: - yield - out_z_stb = (yield dut.out_z.stb) - if not out_z_stb: - continue - yield dut.in_a.stb.eq(0) - yield dut.in_b.stb.eq(0) - yield dut.out_z.ack.eq(1) - yield - yield dut.out_z.ack.eq(0) - yield - yield - break - - out_z = yield dut.out_z.v - assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z) - -def testbench(dut): - yield from check_case(dut, 0x4008000000000000, 0x3FF0000000000000, - 0x4008000000000000) - yield from check_case(dut, 0x3FF0000000000000, 0x4008000000000000, - 0x3FD5555555555555) - - if False: - yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000) - yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB) - yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C) - yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2) - yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8) - yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC) - yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5) - yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2) - -if __name__ == '__main__': - dut = FPDIV(width=64) - run_simulation(dut, testbench(dut), vcd_name="test_div64.vcd") - diff --git a/src/add/test_dual.py b/src/add/test_dual.py deleted file mode 100644 index 15f5c762..00000000 --- a/src/add/test_dual.py +++ /dev/null @@ -1,60 +0,0 @@ -from sfpy import Float32 -from nmigen.compat.sim import run_simulation -from dual_add_experiment import ALU - - -def get_case(dut, a, b, c): - yield dut.a.v.eq(a) - yield dut.a.stb.eq(1) - yield - yield - a_ack = (yield dut.a.ack) - assert a_ack == 0 - - yield dut.a.stb.eq(0) - - yield dut.b.v.eq(b) - yield dut.b.stb.eq(1) - yield - yield - b_ack = (yield dut.b.ack) - assert b_ack == 0 - - yield dut.b.stb.eq(0) - - yield dut.c.v.eq(c) - yield dut.c.stb.eq(1) - yield - yield - c_ack = (yield dut.c.ack) - assert c_ack == 0 - - yield dut.c.stb.eq(0) - - yield dut.z.ack.eq(1) - - while True: - out_z_stb = (yield dut.z.stb) - if not out_z_stb: - yield - continue - - out_z = yield dut.z.v - - yield dut.z.ack.eq(0) - break - - return out_z - -def check_case(dut, a, b, c, z): - out_z = yield from get_case(dut, a, b, c) - assert out_z == z, "Output z 0x%x != 0x%x" % (out_z, z) - -def testbench(dut): - yield from check_case(dut, 0, 0, 0, 0) - yield from check_case(dut, 0x3F800000, 0x40000000, 0xc0000000, 0x3F800000) - -if __name__ == '__main__': - dut = ALU(width=32) - run_simulation(dut, testbench(dut), vcd_name="test_dual_add.vcd") - diff --git a/src/add/test_fpadd_pipe.py b/src/add/test_fpadd_pipe.py deleted file mode 100644 index df25e55f..00000000 --- a/src/add/test_fpadd_pipe.py +++ /dev/null @@ -1,126 +0,0 @@ -""" key strategic example showing how to do multi-input fan-in into a - multi-stage pipeline, then multi-output fanout. - - the multiplex ID from the fan-in is passed in to the pipeline, preserved, - and used as a routing ID on the fanout. -""" - -from random import randint -from math import log -from nmigen import Module, Signal, Cat, Value -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil - -from nmigen_add_experiment import (FPADDMuxInOut,) - -from sfpy import Float32 - -class InputTest: - def __init__(self, dut): - self.dut = dut - self.di = {} - self.do = {} - self.tlen = 10 - self.width = 32 - for mid in range(dut.num_rows): - self.di[mid] = {} - self.do[mid] = [] - for i in range(self.tlen): - op1 = randint(0, (1<> (i+1)) << 1) | (m & 1) - for l in range(i): - if m & (1<<(l+1)): - calc_m |= 1 - - assert out_e == calc_e, "Output e 0x%x != expected 0x%x" % (out_e, calc_e) - assert out_m == calc_m, "Output m 0x%x != expected 0x%x" % (out_m, calc_m) - -def testbench(dut): - m_width = dut.a.m_width - e_width = dut.a.e_width - e_max = dut.a.e_max - for j in range(200): - m = randint(0, (1<> b) & ((1<> b) & ((1<> 52) - 1023 - -def get_sign(x): - return ((x & 0x8000000000000000) >> 63) - -def is_nan(x): - return get_exponent(x) == 1024 and get_mantissa(x) != 0 - -def is_inf(x): - return get_exponent(x) == 1024 and get_mantissa(x) == 0 - -def is_pos_inf(x): - return is_inf(x) and not get_sign(x) - -def is_neg_inf(x): - return is_inf(x) and get_sign(x) - -def match(x, y): - return ( - (is_pos_inf(x) and is_pos_inf(y)) or - (is_neg_inf(x) and is_neg_inf(y)) or - (is_nan(x) and is_nan(y)) or - (x == y) - ) - -def get_case(dut, a, b): - yield dut.in_a.v.eq(a) - yield dut.in_a.stb.eq(1) - yield - yield - a_ack = (yield dut.in_a.ack) - assert a_ack == 0 - yield dut.in_b.v.eq(b) - yield dut.in_b.stb.eq(1) - b_ack = (yield dut.in_b.ack) - assert b_ack == 0 - - while True: - yield - out_z_stb = (yield dut.out_z.stb) - if not out_z_stb: - continue - yield dut.in_a.stb.eq(0) - yield dut.in_b.stb.eq(0) - yield dut.out_z.ack.eq(1) - yield - yield dut.out_z.ack.eq(0) - yield - yield - break - - out_z = yield dut.out_z.v - return out_z - -def check_case(dut, a, b, z): - out_z = yield from get_case(dut, a, b) - assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z) - - -def run_test(dut, stimulus_a, stimulus_b, op): - - expected_responses = [] - actual_responses = [] - for a, b in zip(stimulus_a, stimulus_b): - af = Float64.from_bits(a) - bf = Float64.from_bits(b) - z = op(af, bf) - expected_responses.append(z.get_bits()) - #print (af, bf, z) - actual = yield from get_case(dut, a, b) - actual_responses.append(actual) - - if len(actual_responses) < len(expected_responses): - print ("Fail ... not enough results") - exit(0) - - for exp, act, a, b in zip(expected_responses, actual_responses, - stimulus_a, stimulus_b): - passed = match(exp, act) - - if not passed: - - print ("Fail ... expected:", hex(exp), "actual:", hex(act)) - - print (hex(a)) - print ("a mantissa:", a & 0x000fffffffffffff) - print ("a exponent:", ((a & 0x7ff0000000000000) >> 52)\ - - 1023) - print ("a sign:", ((a & 0x8000000000000000) >> 63)) - - print (hex(b)) - print ("b mantissa:", b & 0x000fffffffffffff) - print ("b exponent:", ((b & 0x7ff0000000000000) >> 52)\ - - 1023) - print ("b sign:", ((b & 0x8000000000000000) >> 63)) - - print (hex(exp)) - print ("expected mantissa:", exp & 0x000fffffffffffff) - print ("expected exponent:", ((exp & 0x7ff0000000000000) >> 52)\ - - 1023) - print ("expected sign:", ((exp & 0x8000000000000000) >> 63)) - - print (hex(act)) - print ("actual mantissa:", act & 0x000fffffffffffff) - print ("actual exponent:", ((act & 0x7ff0000000000000) >> 52)\ - - 1023) - print ("actual sign:", ((act & 0x8000000000000000) >> 63)) - - sys.exit(0) - - -def run_corner_cases(dut, count, op): - #corner cases - from itertools import permutations - stimulus_a = [i[0] for i in permutations([ - 0x8000000000000000, - 0x0000000000000000, - 0x7ff8000000000000, - 0xfff8000000000000, - 0x7ff0000000000000, - 0xfff0000000000000 - ], 2)] - stimulus_b = [i[1] for i in permutations([ - 0x8000000000000000, - 0x0000000000000000, - 0x7ff8000000000000, - 0xfff8000000000000, - 0x7ff0000000000000, - 0xfff0000000000000 - ], 2)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - -def run_edge_cases(dut, count, op): - #edge cases - stimulus_a = [0x8000000000000000 for i in range(1000)] - stimulus_b = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_a = [0x0000000000000000 for i in range(1000)] - stimulus_b = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_b = [0x8000000000000000 for i in range(1000)] - stimulus_a = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_b = [0x0000000000000000 for i in range(1000)] - stimulus_a = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_a = [0x7FF8000000000000 for i in range(1000)] - stimulus_b = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_a = [0xFFF8000000000000 for i in range(1000)] - stimulus_b = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_b = [0x7FF8000000000000 for i in range(1000)] - stimulus_a = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_b = [0xFFF8000000000000 for i in range(1000)] - stimulus_a = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_a = [0x7FF0000000000000 for i in range(1000)] - stimulus_b = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_a = [0xFFF0000000000000 for i in range(1000)] - stimulus_b = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_b = [0x7FF0000000000000 for i in range(1000)] - stimulus_a = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_b = [0xFFF0000000000000 for i in range(1000)] - stimulus_a = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - #seed(0) - for i in range(100000): - stimulus_a = [randint(0, 1<<64) for i in range(1000)] - stimulus_b = [randint(0, 1<<64) for i in range(1000)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += 1000 - print (count, "random vectors passed") - diff --git a/src/add/unit_test_half.py b/src/add/unit_test_half.py deleted file mode 100644 index 73c9b653..00000000 --- a/src/add/unit_test_half.py +++ /dev/null @@ -1,211 +0,0 @@ -from random import randint -from random import seed - -import sys -from sfpy import Float16 - -def get_mantissa(x): - return 0x3ff & x - -def get_exponent(x): - return ((x & 0xf800) >> 11) - 15 - -def get_sign(x): - return ((x & 0x8000) >> 15) - -def is_nan(x): - return get_exponent(x) == 16 and get_mantissa(x) != 0 - -def is_inf(x): - return get_exponent(x) == 16 and get_mantissa(x) == 0 - -def is_pos_inf(x): - return is_inf(x) and not get_sign(x) - -def is_neg_inf(x): - return is_inf(x) and get_sign(x) - -def match(x, y): - return ( - (is_pos_inf(x) and is_pos_inf(y)) or - (is_neg_inf(x) and is_neg_inf(y)) or - (is_nan(x) and is_nan(y)) or - (x == y) - ) - -def get_case(dut, a, b): - yield dut.in_a.v.eq(a) - yield dut.in_a.stb.eq(1) - yield - yield - a_ack = (yield dut.in_a.ack) - assert a_ack == 0 - yield dut.in_b.v.eq(b) - yield dut.in_b.stb.eq(1) - b_ack = (yield dut.in_b.ack) - assert b_ack == 0 - - while True: - yield - out_z_stb = (yield dut.out_z.stb) - if not out_z_stb: - continue - yield dut.in_a.stb.eq(0) - yield dut.in_b.stb.eq(0) - yield dut.out_z.ack.eq(1) - yield - yield dut.out_z.ack.eq(0) - yield - yield - break - - out_z = yield dut.out_z.v - return out_z - -def check_case(dut, a, b, z): - out_z = yield from get_case(dut, a, b) - assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z) - - -def run_test(dut, stimulus_a, stimulus_b, op): - - expected_responses = [] - actual_responses = [] - for a, b in zip(stimulus_a, stimulus_b): - af = Float16.from_bits(a) - bf = Float16.from_bits(b) - z = op(af, bf) - expected_responses.append(z.get_bits()) - #print (af, bf, z) - actual = yield from get_case(dut, a, b) - actual_responses.append(actual) - - if len(actual_responses) < len(expected_responses): - print ("Fail ... not enough results") - exit(0) - - for expected, actual, a, b in zip(expected_responses, actual_responses, - stimulus_a, stimulus_b): - passed = match(expected, actual) - - if not passed: - - print ("Fail ... expected:", hex(expected), "actual:", hex(actual)) - - print (hex(a)) - print ("a mantissa:", get_mantissa(a)) - print ("a exponent:", get_exponent(a)) - print ("a sign:", get_sign(a)) - - print (hex(b)) - print ("b mantissa:", get_mantissa(b)) - print ("b exponent:", get_exponent(b)) - print ("b sign:", get_sign(b)) - - print (hex(expected)) - print ("expected mantissa:", get_mantissa(expected)) - print ("expected exponent:", get_exponent(expected)) - print ("expected sign:", get_sign(expected)) - - print (hex(actual)) - print ("actual mantissa:", get_mantissa(actual)) - print ("actual exponent:", get_exponent(actual)) - print ("actual sign:", get_sign(actual)) - - sys.exit(0) - -def run_corner_cases(dut, count, op): - #corner cases - corners = [0x8000, 0x0000, 0x7800, 0xf800, 0x7c00, 0xfc00] - from itertools import permutations - stimulus_a = [i[0] for i in permutations(corners, 2)] - stimulus_b = [i[1] for i in permutations(corners, 2)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - -def run_edge_cases(dut, count, op): - maxint16 = 1<<16 - maxcount = 10 - #edge cases - stimulus_a = [0x8000 for i in range(maxcount)] - stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_a = [0x0000 for i in range(maxcount)] - stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_b = [0x8000 for i in range(maxcount)] - stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_b = [0x0000 for i in range(maxcount)] - stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_a = [0x7800 for i in range(maxcount)] - stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_a = [0xF800 for i in range(maxcount)] - stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_b = [0x7800 for i in range(maxcount)] - stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_b = [0xF800 for i in range(maxcount)] - stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_a = [0x7C00 for i in range(maxcount)] - stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_a = [0xFC00 for i in range(maxcount)] - stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_b = [0x7C00 for i in range(maxcount)] - stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - stimulus_b = [0xFC00 for i in range(maxcount)] - stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += len(stimulus_a) - print (count, "vectors passed") - - #seed(0) - for i in range(100000): - stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] - stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] - yield from run_test(dut, stimulus_a, stimulus_b, op) - count += maxcount - print (count, "random vectors passed") - diff --git a/src/add/unit_test_single.py b/src/add/unit_test_single.py deleted file mode 100644 index 2b0d9e56..00000000 --- a/src/add/unit_test_single.py +++ /dev/null @@ -1,255 +0,0 @@ -from random import randint -from random import seed - -import sys -from sfpy import Float32 - -def get_mantissa(x): - return 0x7fffff & x - -def get_exponent(x): - return ((x & 0x7f800000) >> 23) - 127 - -def set_exponent(x, e): - return (x & ~0x7f800000) | ((e+127) << 23) - -def get_sign(x): - return ((x & 0x80000000) >> 31) - -def is_nan(x): - return get_exponent(x) == 128 and get_mantissa(x) != 0 - -def is_inf(x): - return get_exponent(x) == 128 and get_mantissa(x) == 0 - -def is_pos_inf(x): - return is_inf(x) and not get_sign(x) - -def is_neg_inf(x): - return is_inf(x) and get_sign(x) - -def match(x, y): - return ( - (is_pos_inf(x) and is_pos_inf(y)) or - (is_neg_inf(x) and is_neg_inf(y)) or - (is_nan(x) and is_nan(y)) or - (x == y) - ) - -def get_rs_case(dut, a, b, mid): - in_a, in_b = dut.rs[0] - out_z = dut.res[0] - yield dut.ids.in_mid.eq(mid) - yield in_a.v.eq(a) - yield in_a.valid_i.eq(1) - yield - yield - yield - yield - a_ack = (yield in_a.ready_o) - assert a_ack == 0 - - yield in_a.valid_i.eq(0) - - yield in_b.v.eq(b) - yield in_b.valid_i.eq(1) - yield - yield - b_ack = (yield in_b.ready_o) - assert b_ack == 0 - - yield in_b.valid_i.eq(0) - - yield out_z.ready_i.eq(1) - - while True: - out_z_stb = (yield out_z.valid_o) - if not out_z_stb: - yield - continue - vout_z = yield out_z.v - #out_mid = yield dut.ids.out_mid - yield out_z.ready_i.eq(0) - yield - break - - return vout_z, mid - -def check_rs_case(dut, a, b, z, mid=None): - if mid is None: - mid = randint(0, 6) - mid = 0 - out_z, out_mid = yield from get_rs_case(dut, a, b, mid) - assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z) - assert out_mid == mid, "Output mid 0x%x != expected 0x%x" % (out_mid, mid) - - -def get_case(dut, a, b, mid): - #yield dut.in_mid.eq(mid) - yield dut.in_a.v.eq(a) - yield dut.in_a.valid_i_test.eq(1) - yield - yield - yield - yield - a_ack = (yield dut.in_a.ready_o) - assert a_ack == 0 - - yield dut.in_a.valid_i.eq(0) - - yield dut.in_b.v.eq(b) - yield dut.in_b.valid_i.eq(1) - yield - yield - b_ack = (yield dut.in_b.ready_o) - assert b_ack == 0 - - yield dut.in_b.valid_i.eq(0) - - yield dut.out_z.ready_i.eq(1) - - while True: - out_z_stb = (yield dut.out_z.valid_o) - if not out_z_stb: - yield - continue - out_z = yield dut.out_z.v - #out_mid = yield dut.out_mid - yield dut.out_z.ready_i.eq(0) - yield - break - - return out_z, mid # TODO: mid - -def check_case(dut, a, b, z, mid=None): - if mid is None: - mid = randint(0, 6) - mid = 0 - out_z, out_mid = yield from get_case(dut, a, b, mid) - assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z) - assert out_mid == mid, "Output mid 0x%x != expected 0x%x" % (out_mid, mid) - - -def run_test(dut, stimulus_a, stimulus_b, op, get_case_fn): - - expected_responses = [] - actual_responses = [] - for a, b in zip(stimulus_a, stimulus_b): - mid = randint(0, 6) - mid = 0 - af = Float32.from_bits(a) - bf = Float32.from_bits(b) - z = op(af, bf) - expected_responses.append((z.get_bits(), mid)) - actual = yield from get_case_fn(dut, a, b, mid) - actual_responses.append(actual) - - if len(actual_responses) < len(expected_responses): - print ("Fail ... not enough results") - exit(0) - - for expected, actual, a, b in zip(expected_responses, actual_responses, - stimulus_a, stimulus_b): - passed = match(expected[0], actual[0]) - if expected[1] != actual[1]: # check mid - print ("MID failed", expected[1], actual[1]) - sys.exit(0) - - if not passed: - - expected = expected[0] - actual = actual[0] - print ("Fail ... expected:", hex(expected), "actual:", hex(actual)) - - print (hex(a)) - print ("a mantissa:", a & 0x7fffff) - print ("a exponent:", ((a & 0x7f800000) >> 23) - 127) - print ("a sign:", ((a & 0x80000000) >> 31)) - - print (hex(b)) - print ("b mantissa:", b & 0x7fffff) - print ("b exponent:", ((b & 0x7f800000) >> 23) - 127) - print ("b sign:", ((b & 0x80000000) >> 31)) - - print (hex(expected)) - print ("expected mantissa:", expected & 0x7fffff) - print ("expected exponent:", ((expected & 0x7f800000) >> 23) - 127) - print ("expected sign:", ((expected & 0x80000000) >> 31)) - - print (hex(actual)) - print ("actual mantissa:", actual & 0x7fffff) - print ("actual exponent:", ((actual & 0x7f800000) >> 23) - 127) - print ("actual sign:", ((actual & 0x80000000) >> 31)) - - sys.exit(0) - -corner_cases = [0x80000000, 0x00000000, 0x7f800000, 0xff800000, - 0x7fc00000, 0xffc00000] - -def run_corner_cases(dut, count, op, get_case_fn): - #corner cases - from itertools import permutations - stimulus_a = [i[0] for i in permutations(corner_cases, 2)] - stimulus_b = [i[1] for i in permutations(corner_cases, 2)] - yield from run_test(dut, stimulus_a, stimulus_b, op, get_case_fn) - count += len(stimulus_a) - print (count, "vectors passed") - -def run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn): - yield from run_test(dut, stimulus_a, stimulus_b, op, get_case_fn) - yield from run_test(dut, stimulus_b, stimulus_a, op, get_case_fn) - -def run_cases(dut, count, op, fixed_num, num_entries, get_case_fn): - if isinstance(fixed_num, int): - stimulus_a = [fixed_num for i in range(num_entries)] - report = hex(fixed_num) - else: - stimulus_a = fixed_num - report = "random" - - stimulus_b = [randint(0, 1<<32) for i in range(num_entries)] - yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn) - count += len(stimulus_a) - print (count, "vectors passed 2^32", report) - - # non-canonical NaNs. - stimulus_b = [set_exponent(randint(0, 1<<32), 128) \ - for i in range(num_entries)] - yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn) - count += len(stimulus_a) - print (count, "vectors passed Non-Canonical NaN", report) - - # -127 - stimulus_b = [set_exponent(randint(0, 1<<32), -127) \ - for i in range(num_entries)] - yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn) - count += len(stimulus_a) - print (count, "vectors passed exp=-127", report) - - # nearly zero - stimulus_b = [set_exponent(randint(0, 1<<32), -126) \ - for i in range(num_entries)] - yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn) - count += len(stimulus_a) - print (count, "vectors passed exp=-126", report) - - # nearly inf - stimulus_b = [set_exponent(randint(0, 1<<32), 127) \ - for i in range(num_entries)] - yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn) - count += len(stimulus_a) - print (count, "vectors passed exp=127", report) - - return count - -def run_edge_cases(dut, count, op, get_case_fn): - #edge cases - for testme in corner_cases: - count = yield from run_cases(dut, count, op, testme, 10, get_case_fn) - - for i in range(100000): - stimulus_a = [randint(0, 1<<32) for i in range(10)] - count = yield from run_cases(dut, count, op, stimulus_a, 10, - get_case_fn) - return count - diff --git a/src/ieee754/add/concurrentunit.py b/src/ieee754/add/concurrentunit.py new file mode 100644 index 00000000..c0053c8b --- /dev/null +++ b/src/ieee754/add/concurrentunit.py @@ -0,0 +1,74 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from math import log +from nmigen import Module +from nmigen.cli import main, verilog + +from singlepipe import PassThroughStage +from multipipe import CombMuxOutPipe +from multipipe import PriorityCombMuxInPipe + +from fpcommon.getop import FPADDBaseData +from fpcommon.denorm import FPSCData +from fpcommon.pack import FPPackData +from fpcommon.normtopack import FPNormToPack +from fpadd.specialcases import FPAddSpecialCasesDeNorm +from fpadd.addstages import FPAddAlignSingleAdd + + +def num_bits(n): + return int(log(n) / log(2)) + +class FPADDInMuxPipe(PriorityCombMuxInPipe): + def __init__(self, num_rows, iospecfn): + self.num_rows = num_rows + stage = PassThroughStage(iospecfn) + PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows) + + +class FPADDMuxOutPipe(CombMuxOutPipe): + def __init__(self, num_rows, iospecfn): + self.num_rows = num_rows + stage = PassThroughStage(iospecfn) + CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows) + + +class ReservationStations: + """ Reservation-Station pipeline + + Input: num_rows - number of input and output Reservation Stations + + Requires: the addition of an "alu" object, an i_specfn and an o_specfn + + * fan-in on inputs (an array of FPADDBaseData: a,b,mid) + * ALU pipeline + * fan-out on outputs (an array of FPPackData: z,mid) + + Fan-in and Fan-out are combinatorial. + """ + def __init__(self, num_rows): + self.num_rows = num_rows + self.inpipe = FPADDInMuxPipe(num_rows, self.i_specfn) # fan-in + self.outpipe = FPADDMuxOutPipe(num_rows, self.o_specfn) # fan-out + + self.p = self.inpipe.p # kinda annoying, + self.n = self.outpipe.n # use pipe in/out as this class in/out + self._ports = self.inpipe.ports() + self.outpipe.ports() + + def elaborate(self, platform): + m = Module() + m.submodules.inpipe = self.inpipe + m.submodules.alu = self.alu + m.submodules.outpipe = self.outpipe + + m.d.comb += self.inpipe.n.connect_to_next(self.alu.p) + m.d.comb += self.alu.connect_to_next(self.outpipe) + + return m + + def ports(self): + return self._ports + + diff --git a/src/ieee754/add/dual_add_experiment.py b/src/ieee754/add/dual_add_experiment.py new file mode 100644 index 00000000..7ec479f5 --- /dev/null +++ b/src/ieee754/add/dual_add_experiment.py @@ -0,0 +1,72 @@ +from nmigen import * +from nmigen.cli import main + +from nmigen_add_experiment import FPADD +from fpbase import FPOp + + +class Adder: + def __init__(self, width): + self.a = Signal(width) + self.b = Signal(width) + self.o = Signal(width) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.o.eq(self.a + self.b) + return m + + +class Subtractor: + def __init__(self, width): + self.a = Signal(width) + self.b = Signal(width) + self.o = Signal(width) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.o.eq(self.a - self.b) + return m + + +class ALU: + def __init__(self, width): + #self.op = Signal() + self.a = FPOp(width) + self.b = FPOp(width) + self.c = FPOp(width) + self.z = FPOp(width) + self.int_stb = Signal() + + self.add1 = FPADD(width) + self.add2 = FPADD(width) + + def elaborate(self, platform): + m = Module() + m.submodules.add1 = self.add1 + m.submodules.add2 = self.add2 + # join add1 a to a: add1.in_a = a + m.d.comb += self.add1.in_a.chain_from(self.a) + # join add1 b to b: add1.in_b = b + m.d.comb += self.add1.in_b.chain_from(self.b) + # join add2 a to c: add2.in_a = c + m.d.comb += self.add2.in_a.chain_from(self.c) + # join add2 b to add1 z: add2.in_b = add1.out_z + m.d.comb += self.add2.in_b.chain_inv(self.add1.out_z) + # join output from add2 to z: z = add2.out_z + m.d.comb += self.z.chain_from(self.add2.out_z) + # get at add1's stb signal + m.d.comb += self.int_stb.eq(self.add1.out_z.stb) + #with m.If(self.op): + # m.d.comb += self.o.eq(self.sub.o) + #with m.Else(): + # m.d.comb += self.o.eq(self.add.o) + return m + + +if __name__ == "__main__": + alu = ALU(width=16) + main(alu, ports=alu.a.ports() + \ + alu.b.ports() + \ + alu.c.ports() + \ + alu.z.ports()) diff --git a/src/ieee754/add/example_buf_pipe.py b/src/ieee754/add/example_buf_pipe.py new file mode 100644 index 00000000..4bb7cdf1 --- /dev/null +++ b/src/ieee754/add/example_buf_pipe.py @@ -0,0 +1,103 @@ +""" Pipeline and BufferedHandshake examples +""" + +from nmoperator import eq +from iocontrol import (PrevControl, NextControl) +from singlepipe import (PrevControl, NextControl, ControlBase, + StageCls, Stage, StageChain, + BufferedHandshake, UnbufferedPipeline) + +from nmigen import Signal, Module +from nmigen.cli import verilog, rtlil + + +class ExampleAddStage(StageCls): + """ an example of how to use the buffered pipeline, as a class instance + """ + + def ispec(self): + """ returns a tuple of input signals which will be the incoming data + """ + return (Signal(16), Signal(16)) + + def ospec(self): + """ returns an output signal which will happen to contain the sum + of the two inputs + """ + return Signal(16) + + def process(self, i): + """ process the input data (sums the values in the tuple) and returns it + """ + return i[0] + i[1] + + +class ExampleBufPipeAdd(BufferedHandshake): + """ an example of how to use the buffered pipeline, using a class instance + """ + + def __init__(self): + addstage = ExampleAddStage() + BufferedHandshake.__init__(self, addstage) + + +class ExampleStage(Stage): + """ an example of how to use the buffered pipeline, in a static class + fashion + """ + + def ispec(): + return Signal(16, name="example_input_signal") + + def ospec(): + return Signal(16, name="example_output_signal") + + def process(i): + """ process the input data and returns it (adds 1) + """ + return i + 1 + + +class ExampleStageCls(StageCls): + """ an example of how to use the buffered pipeline, in a static class + fashion + """ + + def ispec(self): + return Signal(16, name="example_input_signal") + + def ospec(self): + return Signal(16, name="example_output_signal") + + def process(self, i): + """ process the input data and returns it (adds 1) + """ + return i + 1 + + +class ExampleBufPipe(BufferedHandshake): + """ an example of how to use the buffered pipeline. + """ + + def __init__(self): + BufferedHandshake.__init__(self, ExampleStage) + + +class ExamplePipeline(UnbufferedPipeline): + """ an example of how to use the unbuffered pipeline. + """ + + def __init__(self): + UnbufferedPipeline.__init__(self, ExampleStage) + + +if __name__ == '__main__': + dut = ExampleBufPipe() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_bufpipe.il", "w") as f: + f.write(vl) + + dut = ExamplePipeline() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_combpipe.il", "w") as f: + f.write(vl) diff --git a/src/ieee754/add/fadd_state.py b/src/ieee754/add/fadd_state.py new file mode 100644 index 00000000..7ad88786 --- /dev/null +++ b/src/ieee754/add/fadd_state.py @@ -0,0 +1,282 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal, Cat +from nmigen.cli import main, verilog + +from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase + +from singlepipe import eq + + +class FPADD(FPBase): + + def __init__(self, width, single_cycle=False): + FPBase.__init__(self) + self.width = width + self.single_cycle = single_cycle + + self.in_a = FPOp(width) + self.in_b = FPOp(width) + self.out_z = FPOp(width) + + def elaborate(self, platform=None): + """ creates the HDL code-fragment for FPAdd + """ + m = Module() + + # Latches + a = FPNumIn(self.in_a, self.width) + b = FPNumIn(self.in_b, self.width) + z = FPNumOut(self.width, False) + + m.submodules.fpnum_a = a + m.submodules.fpnum_b = b + m.submodules.fpnum_z = z + + m.d.comb += a.v.eq(self.in_a.v) + m.d.comb += b.v.eq(self.in_b.v) + + w = z.m_width + 4 + tot = Signal(w, reset_less=True) # sticky/round/guard, {mantissa} result, 1 overflow + + of = Overflow() + + m.submodules.overflow = of + + with m.FSM() as fsm: + + # ****** + # gets operand a + + with m.State("get_a"): + res = self.get_op(m, self.in_a, a, "get_b") + m.d.sync += eq([a, self.in_a.ack], res) + + # ****** + # gets operand b + + with m.State("get_b"): + res = self.get_op(m, self.in_b, b, "special_cases") + m.d.sync += eq([b, self.in_b.ack], res) + + # ****** + # special cases: NaNs, infs, zeros, denormalised + # NOTE: some of these are unique to add. see "Special Operations" + # https://steve.hollasch.net/cgindex/coding/ieeefloat.html + + with m.State("special_cases"): + + s_nomatch = Signal() + m.d.comb += s_nomatch.eq(a.s != b.s) + + m_match = Signal() + m.d.comb += m_match.eq(a.m == b.m) + + # if a is NaN or b is NaN return NaN + with m.If(a.is_nan | b.is_nan): + m.next = "put_z" + m.d.sync += z.nan(1) + + # XXX WEIRDNESS for FP16 non-canonical NaN handling + # under review + + ## if a is zero and b is NaN return -b + #with m.If(a.is_zero & (a.s==0) & b.is_nan): + # m.next = "put_z" + # m.d.sync += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0])) + + ## if b is zero and a is NaN return -a + #with m.Elif(b.is_zero & (b.s==0) & a.is_nan): + # m.next = "put_z" + # m.d.sync += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0])) + + ## if a is -zero and b is NaN return -b + #with m.Elif(a.is_zero & (a.s==1) & b.is_nan): + # m.next = "put_z" + # m.d.sync += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1)) + + ## if b is -zero and a is NaN return -a + #with m.Elif(b.is_zero & (b.s==1) & a.is_nan): + # m.next = "put_z" + # m.d.sync += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1)) + + # if a is inf return inf (or NaN) + with m.Elif(a.is_inf): + m.next = "put_z" + m.d.sync += z.inf(a.s) + # if a is inf and signs don't match return NaN + with m.If(b.exp_128 & s_nomatch): + m.d.sync += z.nan(1) + + # if b is inf return inf + with m.Elif(b.is_inf): + m.next = "put_z" + m.d.sync += z.inf(b.s) + + # if a is zero and b zero return signed-a/b + with m.Elif(a.is_zero & b.is_zero): + m.next = "put_z" + m.d.sync += z.create(a.s & b.s, b.e, b.m[3:-1]) + + # if a is zero return b + with m.Elif(a.is_zero): + m.next = "put_z" + m.d.sync += z.create(b.s, b.e, b.m[3:-1]) + + # if b is zero return a + with m.Elif(b.is_zero): + m.next = "put_z" + m.d.sync += z.create(a.s, a.e, a.m[3:-1]) + + # if a equal to -b return zero (+ve zero) + with m.Elif(s_nomatch & m_match & (a.e == b.e)): + m.next = "put_z" + m.d.sync += z.zero(0) + + # Denormalised Number checks + with m.Else(): + m.next = "align" + self.denormalise(m, a) + self.denormalise(m, b) + + # ****** + # align. + + with m.State("align"): + if not self.single_cycle: + # NOTE: this does *not* do single-cycle multi-shifting, + # it *STAYS* in the align state until exponents match + + # exponent of a greater than b: shift b down + with m.If(a.e > b.e): + m.d.sync += b.shift_down() + # exponent of b greater than a: shift a down + with m.Elif(a.e < b.e): + m.d.sync += a.shift_down() + # exponents equal: move to next stage. + with m.Else(): + m.next = "add_0" + else: + # This one however (single-cycle) will do the shift + # in one go. + + # XXX TODO: the shifter used here is quite expensive + # having only one would be better + + ediff = Signal((len(a.e), True), reset_less=True) + ediffr = Signal((len(a.e), True), reset_less=True) + m.d.comb += ediff.eq(a.e - b.e) + m.d.comb += ediffr.eq(b.e - a.e) + with m.If(ediff > 0): + m.d.sync += b.shift_down_multi(ediff) + # exponent of b greater than a: shift a down + with m.Elif(ediff < 0): + m.d.sync += a.shift_down_multi(ediffr) + + m.next = "add_0" + + # ****** + # First stage of add. covers same-sign (add) and subtract + # special-casing when mantissas are greater or equal, to + # give greatest accuracy. + + with m.State("add_0"): + m.next = "add_1" + m.d.sync += z.e.eq(a.e) + # same-sign (both negative or both positive) add mantissas + with m.If(a.s == b.s): + m.d.sync += [ + tot.eq(Cat(a.m, 0) + Cat(b.m, 0)), + z.s.eq(a.s) + ] + # a mantissa greater than b, use a + with m.Elif(a.m >= b.m): + m.d.sync += [ + tot.eq(Cat(a.m, 0) - Cat(b.m, 0)), + z.s.eq(a.s) + ] + # b mantissa greater than a, use b + with m.Else(): + m.d.sync += [ + tot.eq(Cat(b.m, 0) - Cat(a.m, 0)), + z.s.eq(b.s) + ] + + # ****** + # Second stage of add: preparation for normalisation. + # detects when tot sum is too big (tot[27] is kinda a carry bit) + + with m.State("add_1"): + m.next = "normalise_1" + # tot[27] gets set when the sum overflows. shift result down + with m.If(tot[-1]): + m.d.sync += [ + z.m.eq(tot[4:]), + of.m0.eq(tot[4]), + of.guard.eq(tot[3]), + of.round_bit.eq(tot[2]), + of.sticky.eq(tot[1] | tot[0]), + z.e.eq(z.e + 1) + ] + # tot[27] zero case + with m.Else(): + m.d.sync += [ + z.m.eq(tot[3:]), + of.m0.eq(tot[3]), + of.guard.eq(tot[2]), + of.round_bit.eq(tot[1]), + of.sticky.eq(tot[0]) + ] + + # ****** + # First stage of normalisation. + + with m.State("normalise_1"): + self.normalise_1(m, z, of, "normalise_2") + + # ****** + # Second stage of normalisation. + + with m.State("normalise_2"): + self.normalise_2(m, z, of, "round") + + # ****** + # rounding stage + + with m.State("round"): + self.roundz(m, z, of.roundz) + m.next = "corrections" + + # ****** + # correction stage + + with m.State("corrections"): + self.corrections(m, z, "pack") + + # ****** + # pack stage + + with m.State("pack"): + self.pack(m, z, "put_z") + + # ****** + # put_z stage + + with m.State("put_z"): + self.put_z(m, z, self.out_z, "get_a") + + return m + + +if __name__ == "__main__": + alu = FPADD(width=32) + main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports()) + + + # works... but don't use, just do "python fname.py convert -t v" + #print (verilog.convert(alu, ports=[ + # ports=alu.in_a.ports() + \ + # alu.in_b.ports() + \ + # alu.out_z.ports()) diff --git a/src/ieee754/add/fmul.py b/src/ieee754/add/fmul.py new file mode 100644 index 00000000..a2ba41e7 --- /dev/null +++ b/src/ieee754/add/fmul.py @@ -0,0 +1,172 @@ +from nmigen import Module, Signal, Cat, Mux, Array, Const +from nmigen.cli import main, verilog + +from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPState +from fpcommon.getop import FPGetOp +from singlepipe import eq + + +class FPMUL(FPBase): + + def __init__(self, width): + FPBase.__init__(self) + self.width = width + + self.in_a = FPOp(width) + self.in_b = FPOp(width) + self.out_z = FPOp(width) + + self.states = [] + + def add_state(self, state): + self.states.append(state) + return state + + def elaborate(self, platform=None): + """ creates the HDL code-fragment for FPMUL + """ + m = Module() + + # Latches + a = FPNumIn(None, self.width, False) + b = FPNumIn(None, self.width, False) + z = FPNumOut(self.width, False) + + mw = (z.m_width)*2 - 1 + 3 # sticky/round/guard bits + (2*mant) - 1 + product = Signal(mw) + + of = Overflow() + m.submodules.of = of + m.submodules.a = a + m.submodules.b = b + m.submodules.z = z + + m.d.comb += a.v.eq(self.in_a.v) + m.d.comb += b.v.eq(self.in_b.v) + + with m.FSM() as fsm: + + # ****** + # gets operand a + + with m.State("get_a"): + res = self.get_op(m, self.in_a, a, "get_b") + m.d.sync += eq([a, self.in_a.ack], res) + + # ****** + # gets operand b + + with m.State("get_b"): + res = self.get_op(m, self.in_b, b, "special_cases") + m.d.sync += eq([b, self.in_b.ack], res) + + # ****** + # special cases + + with m.State("special_cases"): + #if a or b is NaN return NaN + with m.If(a.is_nan | b.is_nan): + m.next = "put_z" + m.d.sync += z.nan(1) + #if a is inf return inf + with m.Elif(a.is_inf): + m.next = "put_z" + m.d.sync += z.inf(a.s ^ b.s) + #if b is zero return NaN + with m.If(b.is_zero): + m.d.sync += z.nan(1) + #if b is inf return inf + with m.Elif(b.is_inf): + m.next = "put_z" + m.d.sync += z.inf(a.s ^ b.s) + #if a is zero return NaN + with m.If(a.is_zero): + m.next = "put_z" + m.d.sync += z.nan(1) + #if a is zero return zero + with m.Elif(a.is_zero): + m.next = "put_z" + m.d.sync += z.zero(a.s ^ b.s) + #if b is zero return zero + with m.Elif(b.is_zero): + m.next = "put_z" + m.d.sync += z.zero(a.s ^ b.s) + # Denormalised Number checks + with m.Else(): + m.next = "normalise_a" + self.denormalise(m, a) + self.denormalise(m, b) + + # ****** + # normalise_a + + with m.State("normalise_a"): + self.op_normalise(m, a, "normalise_b") + + # ****** + # normalise_b + + with m.State("normalise_b"): + self.op_normalise(m, b, "multiply_0") + + #multiply_0 + with m.State("multiply_0"): + m.next = "multiply_1" + m.d.sync += [ + z.s.eq(a.s ^ b.s), + z.e.eq(a.e + b.e + 1), + product.eq(a.m * b.m * 4) + ] + + #multiply_1 + with m.State("multiply_1"): + mw = z.m_width + m.next = "normalise_1" + m.d.sync += [ + z.m.eq(product[mw+2:]), + of.guard.eq(product[mw+1]), + of.round_bit.eq(product[mw]), + of.sticky.eq(product[0:mw] != 0) + ] + + # ****** + # First stage of normalisation. + with m.State("normalise_1"): + self.normalise_1(m, z, of, "normalise_2") + + # ****** + # Second stage of normalisation. + + with m.State("normalise_2"): + self.normalise_2(m, z, of, "round") + + # ****** + # rounding stage + + with m.State("round"): + self.roundz(m, z, of.roundz) + m.next = "corrections" + + # ****** + # correction stage + + with m.State("corrections"): + self.corrections(m, z, "pack") + + # ****** + # pack stage + with m.State("pack"): + self.pack(m, z, "put_z") + + # ****** + # put_z stage + + with m.State("put_z"): + self.put_z(m, z, self.out_z, "get_a") + + return m + + +if __name__ == "__main__": + alu = FPMUL(width=32) + main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports()) diff --git a/src/ieee754/add/fpadd/__init__.py b/src/ieee754/add/fpadd/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/ieee754/add/fpadd/add0.py b/src/ieee754/add/fpadd/add0.py new file mode 100644 index 00000000..76790fe2 --- /dev/null +++ b/src/ieee754/add/fpadd/add0.py @@ -0,0 +1,113 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal, Cat, Elaboratable +from nmigen.cli import main, verilog + +from fpbase import FPNumBase +from fpbase import FPState +from fpcommon.denorm import FPSCData + + +class FPAddStage0Data: + + def __init__(self, width, id_wid): + self.z = FPNumBase(width, False) + self.out_do_z = Signal(reset_less=True) + self.oz = Signal(width, reset_less=True) + self.tot = Signal(self.z.m_width + 4, reset_less=True) + self.mid = Signal(id_wid, reset_less=True) + + def eq(self, i): + return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz), + self.tot.eq(i.tot), self.mid.eq(i.mid)] + + +class FPAddStage0Mod(Elaboratable): + + def __init__(self, width, id_wid): + self.width = width + self.id_wid = id_wid + self.i = self.ispec() + self.o = self.ospec() + + def ispec(self): + return FPSCData(self.width, self.id_wid) + + def ospec(self): + return FPAddStage0Data(self.width, self.id_wid) + + def process(self, i): + return self.o + + def setup(self, m, i): + """ links module to inputs and outputs + """ + m.submodules.add0 = self + m.d.comb += self.i.eq(i) + + def elaborate(self, platform): + m = Module() + m.submodules.add0_in_a = self.i.a + m.submodules.add0_in_b = self.i.b + m.submodules.add0_out_z = self.o.z + + # store intermediate tests (and zero-extended mantissas) + seq = Signal(reset_less=True) + mge = Signal(reset_less=True) + am0 = Signal(len(self.i.a.m)+1, reset_less=True) + bm0 = Signal(len(self.i.b.m)+1, reset_less=True) + m.d.comb += [seq.eq(self.i.a.s == self.i.b.s), + mge.eq(self.i.a.m >= self.i.b.m), + am0.eq(Cat(self.i.a.m, 0)), + bm0.eq(Cat(self.i.b.m, 0)) + ] + # same-sign (both negative or both positive) add mantissas + with m.If(~self.i.out_do_z): + m.d.comb += self.o.z.e.eq(self.i.a.e) + with m.If(seq): + m.d.comb += [ + self.o.tot.eq(am0 + bm0), + self.o.z.s.eq(self.i.a.s) + ] + # a mantissa greater than b, use a + with m.Elif(mge): + m.d.comb += [ + self.o.tot.eq(am0 - bm0), + self.o.z.s.eq(self.i.a.s) + ] + # b mantissa greater than a, use b + with m.Else(): + m.d.comb += [ + self.o.tot.eq(bm0 - am0), + self.o.z.s.eq(self.i.b.s) + ] + + m.d.comb += self.o.oz.eq(self.i.oz) + m.d.comb += self.o.out_do_z.eq(self.i.out_do_z) + m.d.comb += self.o.mid.eq(self.i.mid) + return m + + +class FPAddStage0(FPState): + """ First stage of add. covers same-sign (add) and subtract + special-casing when mantissas are greater or equal, to + give greatest accuracy. + """ + + def __init__(self, width, id_wid): + FPState.__init__(self, "add_0") + self.mod = FPAddStage0Mod(width) + self.o = self.mod.ospec() + + def setup(self, m, i): + """ links module to inputs and outputs + """ + self.mod.setup(m, i) + + # NOTE: these could be done as combinatorial (merge add0+add1) + m.d.sync += self.o.eq(self.mod.o) + + def action(self, m): + m.next = "add_1" diff --git a/src/ieee754/add/fpadd/add1.py b/src/ieee754/add/fpadd/add1.py new file mode 100644 index 00000000..679f5176 --- /dev/null +++ b/src/ieee754/add/fpadd/add1.py @@ -0,0 +1,95 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal, Elaboratable +from nmigen.cli import main, verilog +from math import log + +from fpbase import FPState +from fpcommon.postcalc import FPAddStage1Data +from fpadd.add0 import FPAddStage0Data + + +class FPAddStage1Mod(FPState, Elaboratable): + """ Second stage of add: preparation for normalisation. + detects when tot sum is too big (tot[27] is kinda a carry bit) + """ + + def __init__(self, width, id_wid): + self.width = width + self.id_wid = id_wid + self.i = self.ispec() + self.o = self.ospec() + + def ispec(self): + return FPAddStage0Data(self.width, self.id_wid) + + def ospec(self): + return FPAddStage1Data(self.width, self.id_wid) + + def process(self, i): + return self.o + + def setup(self, m, i): + """ links module to inputs and outputs + """ + m.submodules.add1 = self + m.submodules.add1_out_overflow = self.o.of + + m.d.comb += self.i.eq(i) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.o.z.eq(self.i.z) + # tot[-1] (MSB) gets set when the sum overflows. shift result down + with m.If(~self.i.out_do_z): + with m.If(self.i.tot[-1]): + m.d.comb += [ + self.o.z.m.eq(self.i.tot[4:]), + self.o.of.m0.eq(self.i.tot[4]), + self.o.of.guard.eq(self.i.tot[3]), + self.o.of.round_bit.eq(self.i.tot[2]), + self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]), + self.o.z.e.eq(self.i.z.e + 1) + ] + # tot[-1] (MSB) zero case + with m.Else(): + m.d.comb += [ + self.o.z.m.eq(self.i.tot[3:]), + self.o.of.m0.eq(self.i.tot[3]), + self.o.of.guard.eq(self.i.tot[2]), + self.o.of.round_bit.eq(self.i.tot[1]), + self.o.of.sticky.eq(self.i.tot[0]) + ] + + m.d.comb += self.o.out_do_z.eq(self.i.out_do_z) + m.d.comb += self.o.oz.eq(self.i.oz) + m.d.comb += self.o.mid.eq(self.i.mid) + + return m + + +class FPAddStage1(FPState): + + def __init__(self, width, id_wid): + FPState.__init__(self, "add_1") + self.mod = FPAddStage1Mod(width) + self.out_z = FPNumBase(width, False) + self.out_of = Overflow() + self.norm_stb = Signal() + + def setup(self, m, i): + """ links module to inputs and outputs + """ + self.mod.setup(m, i) + + m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state + + m.d.sync += self.out_of.eq(self.mod.out_of) + m.d.sync += self.out_z.eq(self.mod.out_z) + m.d.sync += self.norm_stb.eq(1) + + def action(self, m): + m.next = "normalise_1" + diff --git a/src/ieee754/add/fpadd/addstages.py b/src/ieee754/add/fpadd/addstages.py new file mode 100644 index 00000000..f5703aec --- /dev/null +++ b/src/ieee754/add/fpadd/addstages.py @@ -0,0 +1,55 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module +from nmigen.cli import main, verilog + +from singlepipe import (StageChain, SimpleHandshake, + PassThroughStage) + +from fpbase import FPState +from fpcommon.denorm import FPSCData +from fpcommon.postcalc import FPAddStage1Data +from fpadd.align import FPAddAlignSingleMod +from fpadd.add0 import FPAddStage0Mod +from fpadd.add1 import FPAddStage1Mod + + +class FPAddAlignSingleAdd(FPState, SimpleHandshake): + + def __init__(self, width, id_wid): + FPState.__init__(self, "align") + self.width = width + self.id_wid = id_wid + SimpleHandshake.__init__(self, self) # pipeline is its own stage + self.a1o = self.ospec() + + def ispec(self): + return FPSCData(self.width, self.id_wid) + + def ospec(self): + return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec + + def setup(self, m, i): + """ links module to inputs and outputs + """ + + # chain AddAlignSingle, AddStage0 and AddStage1 + mod = FPAddAlignSingleMod(self.width, self.id_wid) + a0mod = FPAddStage0Mod(self.width, self.id_wid) + a1mod = FPAddStage1Mod(self.width, self.id_wid) + + chain = StageChain([mod, a0mod, a1mod]) + chain.setup(m, i) + + self.o = a1mod.o + + def process(self, i): + return self.o + + def action(self, m): + m.d.sync += self.a1o.eq(self.process(None)) + m.next = "normalise_1" + + diff --git a/src/ieee754/add/fpadd/align.py b/src/ieee754/add/fpadd/align.py new file mode 100644 index 00000000..9837a0b8 --- /dev/null +++ b/src/ieee754/add/fpadd/align.py @@ -0,0 +1,211 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal +from nmigen.cli import main, verilog + +from fpbase import FPNumOut, FPNumIn, FPNumBase +from fpbase import MultiShiftRMerge +from fpbase import FPState +from fpcommon.denorm import FPSCData + + +class FPNumIn2Ops: + + def __init__(self, width, id_wid): + self.a = FPNumIn(None, width) + self.b = FPNumIn(None, width) + self.z = FPNumOut(width, False) + self.out_do_z = Signal(reset_less=True) + self.oz = Signal(width, reset_less=True) + self.mid = Signal(id_wid, reset_less=True) + + def eq(self, i): + return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz), + self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)] + + + +class FPAddAlignMultiMod(FPState): + + def __init__(self, width): + self.in_a = FPNumBase(width) + self.in_b = FPNumBase(width) + self.out_a = FPNumIn(None, width) + self.out_b = FPNumIn(None, width) + self.exp_eq = Signal(reset_less=True) + + def elaborate(self, platform): + # This one however (single-cycle) will do the shift + # in one go. + + m = Module() + + m.submodules.align_in_a = self.in_a + m.submodules.align_in_b = self.in_b + m.submodules.align_out_a = self.out_a + m.submodules.align_out_b = self.out_b + + # NOTE: this does *not* do single-cycle multi-shifting, + # it *STAYS* in the align state until exponents match + + # exponent of a greater than b: shift b down + m.d.comb += self.exp_eq.eq(0) + m.d.comb += self.out_a.eq(self.in_a) + m.d.comb += self.out_b.eq(self.in_b) + agtb = Signal(reset_less=True) + altb = Signal(reset_less=True) + m.d.comb += agtb.eq(self.in_a.e > self.in_b.e) + m.d.comb += altb.eq(self.in_a.e < self.in_b.e) + with m.If(agtb): + m.d.comb += self.out_b.shift_down(self.in_b) + # exponent of b greater than a: shift a down + with m.Elif(altb): + m.d.comb += self.out_a.shift_down(self.in_a) + # exponents equal: move to next stage. + with m.Else(): + m.d.comb += self.exp_eq.eq(1) + return m + + +class FPAddAlignMulti(FPState): + + def __init__(self, width, id_wid): + FPState.__init__(self, "align") + self.mod = FPAddAlignMultiMod(width) + self.out_a = FPNumIn(None, width) + self.out_b = FPNumIn(None, width) + self.exp_eq = Signal(reset_less=True) + + def setup(self, m, in_a, in_b): + """ links module to inputs and outputs + """ + m.submodules.align = self.mod + m.d.comb += self.mod.in_a.eq(in_a) + m.d.comb += self.mod.in_b.eq(in_b) + m.d.comb += self.exp_eq.eq(self.mod.exp_eq) + m.d.sync += self.out_a.eq(self.mod.out_a) + m.d.sync += self.out_b.eq(self.mod.out_b) + + def action(self, m): + with m.If(self.exp_eq): + m.next = "add_0" + + +class FPAddAlignSingleMod: + + def __init__(self, width, id_wid): + self.width = width + self.id_wid = id_wid + self.i = self.ispec() + self.o = self.ospec() + + def ispec(self): + return FPSCData(self.width, self.id_wid) + + def ospec(self): + return FPNumIn2Ops(self.width, self.id_wid) + + def process(self, i): + return self.o + + def setup(self, m, i): + """ links module to inputs and outputs + """ + m.submodules.align = self + m.d.comb += self.i.eq(i) + + def elaborate(self, platform): + """ Aligns A against B or B against A, depending on which has the + greater exponent. This is done in a *single* cycle using + variable-width bit-shift + + the shifter used here is quite expensive in terms of gates. + Mux A or B in (and out) into temporaries, as only one of them + needs to be aligned against the other + """ + m = Module() + + m.submodules.align_in_a = self.i.a + m.submodules.align_in_b = self.i.b + m.submodules.align_out_a = self.o.a + m.submodules.align_out_b = self.o.b + + # temporary (muxed) input and output to be shifted + t_inp = FPNumBase(self.width) + t_out = FPNumIn(None, self.width) + espec = (len(self.i.a.e), True) + msr = MultiShiftRMerge(self.i.a.m_width, espec) + m.submodules.align_t_in = t_inp + m.submodules.align_t_out = t_out + m.submodules.multishift_r = msr + + ediff = Signal(espec, reset_less=True) + ediffr = Signal(espec, reset_less=True) + tdiff = Signal(espec, reset_less=True) + elz = Signal(reset_less=True) + egz = Signal(reset_less=True) + + # connect multi-shifter to t_inp/out mantissa (and tdiff) + m.d.comb += msr.inp.eq(t_inp.m) + m.d.comb += msr.diff.eq(tdiff) + m.d.comb += t_out.m.eq(msr.m) + m.d.comb += t_out.e.eq(t_inp.e + tdiff) + m.d.comb += t_out.s.eq(t_inp.s) + + m.d.comb += ediff.eq(self.i.a.e - self.i.b.e) + m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e) + m.d.comb += elz.eq(self.i.a.e < self.i.b.e) + m.d.comb += egz.eq(self.i.a.e > self.i.b.e) + + # default: A-exp == B-exp, A and B untouched (fall through) + m.d.comb += self.o.a.eq(self.i.a) + m.d.comb += self.o.b.eq(self.i.b) + # only one shifter (muxed) + #m.d.comb += t_out.shift_down_multi(tdiff, t_inp) + # exponent of a greater than b: shift b down + with m.If(~self.i.out_do_z): + with m.If(egz): + m.d.comb += [t_inp.eq(self.i.b), + tdiff.eq(ediff), + self.o.b.eq(t_out), + self.o.b.s.eq(self.i.b.s), # whoops forgot sign + ] + # exponent of b greater than a: shift a down + with m.Elif(elz): + m.d.comb += [t_inp.eq(self.i.a), + tdiff.eq(ediffr), + self.o.a.eq(t_out), + self.o.a.s.eq(self.i.a.s), # whoops forgot sign + ] + + m.d.comb += self.o.mid.eq(self.i.mid) + m.d.comb += self.o.z.eq(self.i.z) + m.d.comb += self.o.out_do_z.eq(self.i.out_do_z) + m.d.comb += self.o.oz.eq(self.i.oz) + + return m + + +class FPAddAlignSingle(FPState): + + def __init__(self, width, id_wid): + FPState.__init__(self, "align") + self.mod = FPAddAlignSingleMod(width, id_wid) + self.out_a = FPNumIn(None, width) + self.out_b = FPNumIn(None, width) + + def setup(self, m, i): + """ links module to inputs and outputs + """ + self.mod.setup(m, i) + + # NOTE: could be done as comb + m.d.sync += self.out_a.eq(self.mod.out_a) + m.d.sync += self.out_b.eq(self.mod.out_b) + + def action(self, m): + m.next = "add_0" + + diff --git a/src/ieee754/add/fpadd/pipeline.py b/src/ieee754/add/fpadd/pipeline.py new file mode 100644 index 00000000..e244ee60 --- /dev/null +++ b/src/ieee754/add/fpadd/pipeline.py @@ -0,0 +1,59 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module +from nmigen.cli import main, verilog + +from singlepipe import (ControlBase, SimpleHandshake, PassThroughStage) +from multipipe import CombMuxOutPipe +from multipipe import PriorityCombMuxInPipe + +from fpcommon.getop import FPADDBaseData +from fpcommon.denorm import FPSCData +from fpcommon.pack import FPPackData +from fpcommon.normtopack import FPNormToPack +from fpadd.specialcases import FPAddSpecialCasesDeNorm +from fpadd.addstages import FPAddAlignSingleAdd + +from concurrentunit import ReservationStations, num_bits + + +class FPADDBasePipe(ControlBase): + def __init__(self, width, id_wid): + ControlBase.__init__(self) + self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid) + self.pipe2 = FPAddAlignSingleAdd(width, id_wid) + self.pipe3 = FPNormToPack(width, id_wid) + + self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3]) + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + m.submodules.scnorm = self.pipe1 + m.submodules.addalign = self.pipe2 + m.submodules.normpack = self.pipe3 + m.d.comb += self._eqs + return m + + +class FPADDMuxInOut(ReservationStations): + """ Reservation-Station version of FPADD pipeline. + + * fan-in on inputs (an array of FPADDBaseData: a,b,mid) + * 3-stage adder pipeline + * fan-out on outputs (an array of FPPackData: z,mid) + + Fan-in and Fan-out are combinatorial. + """ + def __init__(self, width, num_rows): + self.width = width + self.id_wid = num_bits(width) + self.alu = FPADDBasePipe(width, self.id_wid) + ReservationStations.__init__(self, num_rows) + + def i_specfn(self): + return FPADDBaseData(self.width, self.id_wid) + + def o_specfn(self): + return FPPackData(self.width, self.id_wid) diff --git a/src/ieee754/add/fpadd/specialcases.py b/src/ieee754/add/fpadd/specialcases.py new file mode 100644 index 00000000..6f9d1a08 --- /dev/null +++ b/src/ieee754/add/fpadd/specialcases.py @@ -0,0 +1,223 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal, Cat, Const +from nmigen.cli import main, verilog +from math import log + +from fpbase import FPNumDecode +from singlepipe import SimpleHandshake, StageChain + +from fpbase import FPState, FPID +from fpcommon.getop import FPADDBaseData +from fpcommon.denorm import (FPSCData, FPAddDeNormMod) + + +class FPAddSpecialCasesMod: + """ special cases: NaNs, infs, zeros, denormalised + NOTE: some of these are unique to add. see "Special Operations" + https://steve.hollasch.net/cgindex/coding/ieeefloat.html + """ + + def __init__(self, width, id_wid): + self.width = width + self.id_wid = id_wid + self.i = self.ispec() + self.o = self.ospec() + + def ispec(self): + return FPADDBaseData(self.width, self.id_wid) + + def ospec(self): + return FPSCData(self.width, self.id_wid) + + def setup(self, m, i): + """ links module to inputs and outputs + """ + m.submodules.specialcases = self + m.d.comb += self.i.eq(i) + + def process(self, i): + return self.o + + def elaborate(self, platform): + m = Module() + + m.submodules.sc_out_z = self.o.z + + # decode: XXX really should move to separate stage + a1 = FPNumDecode(None, self.width) + b1 = FPNumDecode(None, self.width) + m.submodules.sc_decode_a = a1 + m.submodules.sc_decode_b = b1 + m.d.comb += [a1.v.eq(self.i.a), + b1.v.eq(self.i.b), + self.o.a.eq(a1), + self.o.b.eq(b1) + ] + + s_nomatch = Signal(reset_less=True) + m.d.comb += s_nomatch.eq(a1.s != b1.s) + + m_match = Signal(reset_less=True) + m.d.comb += m_match.eq(a1.m == b1.m) + + e_match = Signal(reset_less=True) + m.d.comb += e_match.eq(a1.e == b1.e) + + aeqmb = Signal(reset_less=True) + m.d.comb += aeqmb.eq(s_nomatch & m_match & e_match) + + abz = Signal(reset_less=True) + m.d.comb += abz.eq(a1.is_zero & b1.is_zero) + + abnan = Signal(reset_less=True) + m.d.comb += abnan.eq(a1.is_nan | b1.is_nan) + + bexp128s = Signal(reset_less=True) + m.d.comb += bexp128s.eq(b1.exp_128 & s_nomatch) + + # if a is NaN or b is NaN return NaN + with m.If(abnan): + m.d.comb += self.o.out_do_z.eq(1) + m.d.comb += self.o.z.nan(0) + + # XXX WEIRDNESS for FP16 non-canonical NaN handling + # under review + + ## if a is zero and b is NaN return -b + #with m.If(a.is_zero & (a.s==0) & b.is_nan): + # m.d.comb += self.o.out_do_z.eq(1) + # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0])) + + ## if b is zero and a is NaN return -a + #with m.Elif(b.is_zero & (b.s==0) & a.is_nan): + # m.d.comb += self.o.out_do_z.eq(1) + # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0])) + + ## if a is -zero and b is NaN return -b + #with m.Elif(a.is_zero & (a.s==1) & b.is_nan): + # m.d.comb += self.o.out_do_z.eq(1) + # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1)) + + ## if b is -zero and a is NaN return -a + #with m.Elif(b.is_zero & (b.s==1) & a.is_nan): + # m.d.comb += self.o.out_do_z.eq(1) + # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1)) + + # if a is inf return inf (or NaN) + with m.Elif(a1.is_inf): + m.d.comb += self.o.out_do_z.eq(1) + m.d.comb += self.o.z.inf(a1.s) + # if a is inf and signs don't match return NaN + with m.If(bexp128s): + m.d.comb += self.o.z.nan(0) + + # if b is inf return inf + with m.Elif(b1.is_inf): + m.d.comb += self.o.out_do_z.eq(1) + m.d.comb += self.o.z.inf(b1.s) + + # if a is zero and b zero return signed-a/b + with m.Elif(abz): + m.d.comb += self.o.out_do_z.eq(1) + m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1]) + + # if a is zero return b + with m.Elif(a1.is_zero): + m.d.comb += self.o.out_do_z.eq(1) + m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1]) + + # if b is zero return a + with m.Elif(b1.is_zero): + m.d.comb += self.o.out_do_z.eq(1) + m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1]) + + # if a equal to -b return zero (+ve zero) + with m.Elif(aeqmb): + m.d.comb += self.o.out_do_z.eq(1) + m.d.comb += self.o.z.zero(0) + + # Denormalised Number checks next, so pass a/b data through + with m.Else(): + m.d.comb += self.o.out_do_z.eq(0) + + m.d.comb += self.o.oz.eq(self.o.z.v) + m.d.comb += self.o.mid.eq(self.i.mid) + + return m + + +class FPAddSpecialCases(FPState): + """ special cases: NaNs, infs, zeros, denormalised + NOTE: some of these are unique to add. see "Special Operations" + https://steve.hollasch.net/cgindex/coding/ieeefloat.html + """ + + def __init__(self, width, id_wid): + FPState.__init__(self, "special_cases") + self.mod = FPAddSpecialCasesMod(width) + self.out_z = self.mod.ospec() + self.out_do_z = Signal(reset_less=True) + + def setup(self, m, i): + """ links module to inputs and outputs + """ + self.mod.setup(m, i, self.out_do_z) + m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output + m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid) + + def action(self, m): + self.idsync(m) + with m.If(self.out_do_z): + m.next = "put_z" + with m.Else(): + m.next = "denormalise" + + +class FPAddSpecialCasesDeNorm(FPState, SimpleHandshake): + """ special cases: NaNs, infs, zeros, denormalised + NOTE: some of these are unique to add. see "Special Operations" + https://steve.hollasch.net/cgindex/coding/ieeefloat.html + """ + + def __init__(self, width, id_wid): + FPState.__init__(self, "special_cases") + self.width = width + self.id_wid = id_wid + SimpleHandshake.__init__(self, self) # pipe is its own stage + self.out = self.ospec() + + def ispec(self): + return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec + + def ospec(self): + return FPSCData(self.width, self.id_wid) # DeNorm ospec + + def setup(self, m, i): + """ links module to inputs and outputs + """ + smod = FPAddSpecialCasesMod(self.width, self.id_wid) + dmod = FPAddDeNormMod(self.width, self.id_wid) + + chain = StageChain([smod, dmod]) + chain.setup(m, i) + + # only needed for break-out (early-out) + # self.out_do_z = smod.o.out_do_z + + self.o = dmod.o + + def process(self, i): + return self.o + + def action(self, m): + # for break-out (early-out) + #with m.If(self.out_do_z): + # m.next = "put_z" + #with m.Else(): + m.d.sync += self.out.eq(self.process(None)) + m.next = "align" + + diff --git a/src/ieee754/add/fpadd/statemachine.py b/src/ieee754/add/fpadd/statemachine.py new file mode 100644 index 00000000..4418b3fa --- /dev/null +++ b/src/ieee754/add/fpadd/statemachine.py @@ -0,0 +1,376 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal, Cat, Mux, Array, Const +from nmigen.cli import main, verilog +from math import log + +from fpbase import FPOpIn, FPOpOut +from fpbase import Trigger +from singlepipe import (StageChain, SimpleHandshake) + +from fpbase import FPState, FPID +from fpcommon.getop import (FPGetOp, FPADDBaseData, FPGet2Op) +from fpcommon.denorm import (FPSCData, FPAddDeNorm) +from fpcommon.postcalc import FPAddStage1Data +from fpcommon.postnormalise import (FPNorm1Data, + FPNorm1Single, FPNorm1Multi) +from fpcommon.roundz import (FPRoundData, FPRound) +from fpcommon.corrections import FPCorrections +from fpcommon.pack import (FPPackData, FPPackMod, FPPack) +from fpcommon.normtopack import FPNormToPack +from fpcommon.putz import (FPPutZ, FPPutZIdx) + +from fpadd.specialcases import (FPAddSpecialCases, FPAddSpecialCasesDeNorm) +from fpadd.align import (FPAddAlignMulti, FPAddAlignSingle) +from fpadd.add0 import (FPAddStage0Data, FPAddStage0) +from fpadd.add1 import (FPAddStage1Mod, FPAddStage1) +from fpadd.addstages import FPAddAlignSingleAdd + + +class FPOpData: + def __init__(self, width, id_wid): + self.z = FPOpOut(width) + self.z.data_o = Signal(width) + self.mid = Signal(id_wid, reset_less=True) + + def __iter__(self): + yield self.z + yield self.mid + + def eq(self, i): + return [self.z.eq(i.z), self.mid.eq(i.mid)] + + def ports(self): + return list(self) + + +class FPADDBaseMod: + + def __init__(self, width, id_wid=None, single_cycle=False, compact=True): + """ IEEE754 FP Add + + * width: bit-width of IEEE754. supported: 16, 32, 64 + * id_wid: an identifier that is sync-connected to the input + * single_cycle: True indicates each stage to complete in 1 clock + * compact: True indicates a reduced number of stages + """ + self.width = width + self.id_wid = id_wid + self.single_cycle = single_cycle + self.compact = compact + + self.in_t = Trigger() + self.i = self.ispec() + self.o = self.ospec() + + self.states = [] + + def ispec(self): + return FPADDBaseData(self.width, self.id_wid) + + def ospec(self): + return FPOpData(self.width, self.id_wid) + + def add_state(self, state): + self.states.append(state) + return state + + def elaborate(self, platform=None): + """ creates the HDL code-fragment for FPAdd + """ + m = Module() + m.submodules.out_z = self.o.z + m.submodules.in_t = self.in_t + if self.compact: + self.get_compact_fragment(m, platform) + else: + self.get_longer_fragment(m, platform) + + with m.FSM() as fsm: + + for state in self.states: + with m.State(state.state_from): + state.action(m) + + return m + + def get_longer_fragment(self, m, platform=None): + + get = self.add_state(FPGet2Op("get_ops", "special_cases", + self.width)) + get.setup(m, self.i) + a = get.out_op1 + b = get.out_op2 + get.trigger_setup(m, self.in_t.stb, self.in_t.ack) + + sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid)) + sc.setup(m, a, b, self.in_mid) + + dn = self.add_state(FPAddDeNorm(self.width, self.id_wid)) + dn.setup(m, a, b, sc.in_mid) + + if self.single_cycle: + alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid)) + alm.setup(m, dn.out_a, dn.out_b, dn.in_mid) + else: + alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid)) + alm.setup(m, dn.out_a, dn.out_b, dn.in_mid) + + add0 = self.add_state(FPAddStage0(self.width, self.id_wid)) + add0.setup(m, alm.out_a, alm.out_b, alm.in_mid) + + add1 = self.add_state(FPAddStage1(self.width, self.id_wid)) + add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid) + + if self.single_cycle: + n1 = self.add_state(FPNorm1Single(self.width, self.id_wid)) + n1.setup(m, add1.out_z, add1.out_of, add0.in_mid) + else: + n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid)) + n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid) + + rn = self.add_state(FPRound(self.width, self.id_wid)) + rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid) + + cor = self.add_state(FPCorrections(self.width, self.id_wid)) + cor.setup(m, rn.out_z, rn.in_mid) + + pa = self.add_state(FPPack(self.width, self.id_wid)) + pa.setup(m, cor.out_z, rn.in_mid) + + ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z, + pa.in_mid, self.out_mid)) + + pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z, + pa.in_mid, self.out_mid)) + + def get_compact_fragment(self, m, platform=None): + + get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid) + sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid) + alm = FPAddAlignSingleAdd(self.width, self.id_wid) + n1 = FPNormToPack(self.width, self.id_wid) + + get.trigger_setup(m, self.in_t.stb, self.in_t.ack) + + chainlist = [get, sc, alm, n1] + chain = StageChain(chainlist, specallocate=True) + chain.setup(m, self.i) + + for mod in chainlist: + sc = self.add_state(mod) + + ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o, + n1.out_z.mid, self.o.mid)) + + #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o, + # sc.o.mid, self.o.mid)) + + +class FPADDBase(FPState): + + def __init__(self, width, id_wid=None, single_cycle=False): + """ IEEE754 FP Add + + * width: bit-width of IEEE754. supported: 16, 32, 64 + * id_wid: an identifier that is sync-connected to the input + * single_cycle: True indicates each stage to complete in 1 clock + """ + FPState.__init__(self, "fpadd") + self.width = width + self.single_cycle = single_cycle + self.mod = FPADDBaseMod(width, id_wid, single_cycle) + self.o = self.ospec() + + self.in_t = Trigger() + self.i = self.ispec() + + self.z_done = Signal(reset_less=True) # connects to out_z Strobe + self.in_accept = Signal(reset_less=True) + self.add_stb = Signal(reset_less=True) + self.add_ack = Signal(reset=0, reset_less=True) + + def ispec(self): + return self.mod.ispec() + + def ospec(self): + return self.mod.ospec() + + def setup(self, m, i, add_stb, in_mid): + m.d.comb += [self.i.eq(i), + self.mod.i.eq(self.i), + self.z_done.eq(self.mod.o.z.trigger), + #self.add_stb.eq(add_stb), + self.mod.in_t.stb.eq(self.in_t.stb), + self.in_t.ack.eq(self.mod.in_t.ack), + self.o.mid.eq(self.mod.o.mid), + self.o.z.v.eq(self.mod.o.z.v), + self.o.z.valid_o.eq(self.mod.o.z.valid_o), + self.mod.o.z.ready_i.eq(self.o.z.ready_i_test), + ] + + m.d.sync += self.add_stb.eq(add_stb) + m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state + m.d.sync += self.o.z.ready_i.eq(0) # likewise + #m.d.sync += self.in_t.stb.eq(0) + + m.submodules.fpadd = self.mod + + def action(self, m): + + # in_accept is set on incoming strobe HIGH and ack LOW. + m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb)) + + #with m.If(self.in_t.ack): + # m.d.sync += self.in_t.stb.eq(0) + with m.If(~self.z_done): + # not done: test for accepting an incoming operand pair + with m.If(self.in_accept): + m.d.sync += [ + self.add_ack.eq(1), # acknowledge receipt... + self.in_t.stb.eq(1), # initiate add + ] + with m.Else(): + m.d.sync += [self.add_ack.eq(0), + self.in_t.stb.eq(0), + self.o.z.ready_i.eq(1), + ] + with m.Else(): + # done: acknowledge, and write out id and value + m.d.sync += [self.add_ack.eq(1), + self.in_t.stb.eq(0) + ] + m.next = "put_z" + + return + + if self.in_mid is not None: + m.d.sync += self.out_mid.eq(self.mod.out_mid) + + m.d.sync += [ + self.out_z.v.eq(self.mod.out_z.v) + ] + # move to output state on detecting z ack + with m.If(self.out_z.trigger): + m.d.sync += self.out_z.stb.eq(0) + m.next = "put_z" + with m.Else(): + m.d.sync += self.out_z.stb.eq(1) + + +class FPADD(FPID): + """ FPADD: stages as follows: + + FPGetOp (a) + | + FPGetOp (b) + | + FPAddBase---> FPAddBaseMod + | | + PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ + + FPAddBase is tricky: it is both a stage and *has* stages. + Connection to FPAddBaseMod therefore requires an in stb/ack + and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp + needs to be the thing that raises the incoming stb. + """ + + def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2): + """ IEEE754 FP Add + + * width: bit-width of IEEE754. supported: 16, 32, 64 + * id_wid: an identifier that is sync-connected to the input + * single_cycle: True indicates each stage to complete in 1 clock + """ + self.width = width + self.id_wid = id_wid + self.single_cycle = single_cycle + + #self.out_z = FPOp(width) + self.ids = FPID(id_wid) + + rs = [] + for i in range(rs_sz): + in_a = FPOpIn(width) + in_b = FPOpIn(width) + in_a.data_i = Signal(width) + in_b.data_i = Signal(width) + in_a.name = "in_a_%d" % i + in_b.name = "in_b_%d" % i + rs.append((in_a, in_b)) + self.rs = Array(rs) + + res = [] + for i in range(rs_sz): + out_z = FPOpOut(width) + out_z.data_o = Signal(width) + out_z.name = "out_z_%d" % i + res.append(out_z) + self.res = Array(res) + + self.states = [] + + def add_state(self, state): + self.states.append(state) + return state + + def elaborate(self, platform=None): + """ creates the HDL code-fragment for FPAdd + """ + m = Module() + #m.submodules += self.rs + + in_a = self.rs[0][0] + in_b = self.rs[0][1] + + geta = self.add_state(FPGetOp("get_a", "get_b", + in_a, self.width)) + geta.setup(m, in_a) + a = geta.out_op + + getb = self.add_state(FPGetOp("get_b", "fpadd", + in_b, self.width)) + getb.setup(m, in_b) + b = getb.out_op + + ab = FPADDBase(self.width, self.id_wid, self.single_cycle) + ab = self.add_state(ab) + abd = ab.ispec() # create an input spec object for FPADDBase + m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)] + ab.setup(m, abd, getb.out_decode, self.ids.in_mid) + o = ab.o + + pz = self.add_state(FPPutZIdx("put_z", o.z, self.res, + o.mid, "get_a")) + + with m.FSM() as fsm: + + for state in self.states: + with m.State(state.state_from): + state.action(m) + + return m + + +if __name__ == "__main__": + if True: + alu = FPADD(width=32, id_wid=5, single_cycle=True) + main(alu, ports=alu.rs[0][0].ports() + \ + alu.rs[0][1].ports() + \ + alu.res[0].ports() + \ + [alu.ids.in_mid, alu.ids.out_mid]) + else: + alu = FPADDBase(width=32, id_wid=5, single_cycle=True) + main(alu, ports=[alu.in_a, alu.in_b] + \ + alu.in_t.ports() + \ + alu.out_z.ports() + \ + [alu.in_mid, alu.out_mid]) + + + # works... but don't use, just do "python fname.py convert -t v" + #print (verilog.convert(alu, ports=[ + # ports=alu.in_a.ports() + \ + # alu.in_b.ports() + \ + # alu.out_z.ports()) diff --git a/src/ieee754/add/fpbase.py b/src/ieee754/add/fpbase.py new file mode 100644 index 00000000..f4908592 --- /dev/null +++ b/src/ieee754/add/fpbase.py @@ -0,0 +1,733 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Signal, Cat, Const, Mux, Module, Elaboratable +from math import log +from operator import or_ +from functools import reduce + +from singlepipe import PrevControl, NextControl +from pipeline import ObjectProxy + + +class MultiShiftR: + + def __init__(self, width): + self.width = width + self.smax = int(log(width) / log(2)) + self.i = Signal(width, reset_less=True) + self.s = Signal(self.smax, reset_less=True) + self.o = Signal(width, reset_less=True) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.o.eq(self.i >> self.s) + return m + + +class MultiShift: + """ Generates variable-length single-cycle shifter from a series + of conditional tests on each bit of the left/right shift operand. + Each bit tested produces output shifted by that number of bits, + in a binary fashion: bit 1 if set shifts by 1 bit, bit 2 if set + shifts by 2 bits, each partial result cascading to the next Mux. + + Could be adapted to do arithmetic shift by taking copies of the + MSB instead of zeros. + """ + + def __init__(self, width): + self.width = width + self.smax = int(log(width) / log(2)) + + def lshift(self, op, s): + res = op << s + return res[:len(op)] + res = op + for i in range(self.smax): + zeros = [0] * (1<> s + return res[:len(op)] + res = op + for i in range(self.smax): + zeros = [0] * (1< 0) + m.d.comb += self.exp_lt_n126.eq(self.exp_sub_n126 < 0) + m.d.comb += self.exp_gt127.eq(self.e > self.P127) + m.d.comb += self.exp_n127.eq(self.e == self.N127) + m.d.comb += self.exp_n126.eq(self.e == self.N126) + m.d.comb += self.m_zero.eq(self.m == self.mzero) + m.d.comb += self.m_msbzero.eq(self.m[self.e_start] == 0) + + return m + + def _is_nan(self): + return (self.exp_128) & (~self.m_zero) + + def _is_inf(self): + return (self.exp_128) & (self.m_zero) + + def _is_zero(self): + return (self.exp_n127) & (self.m_zero) + + def _is_overflowed(self): + return self.exp_gt127 + + def _is_denormalised(self): + return (self.exp_n126) & (self.m_msbzero) + + def __iter__(self): + yield self.s + yield self.e + yield self.m + + def eq(self, inp): + return [self.s.eq(inp.s), self.e.eq(inp.e), self.m.eq(inp.m)] + + +class FPNumOut(FPNumBase): + """ Floating-point Number Class + + Contains signals for an incoming copy of the value, decoded into + sign / exponent / mantissa. + Also contains encoding functions, creation and recognition of + zero, NaN and inf (all signed) + + Four extra bits are included in the mantissa: the top bit + (m[-1]) is effectively a carry-overflow. The other three are + guard (m[2]), round (m[1]), and sticky (m[0]) + """ + def __init__(self, width, m_extra=True): + FPNumBase.__init__(self, width, m_extra) + + def elaborate(self, platform): + m = FPNumBase.elaborate(self, platform) + + return m + + def create(self, s, e, m): + """ creates a value from sign / exponent / mantissa + + bias is added here, to the exponent + """ + return [ + self.v[-1].eq(s), # sign + self.v[self.e_start:self.e_end].eq(e + self.P127), # exp (add on bias) + self.v[0:self.e_start].eq(m) # mantissa + ] + + def nan(self, s): + return self.create(s, self.P128, 1<<(self.e_start-1)) + + def inf(self, s): + return self.create(s, self.P128, 0) + + def zero(self, s): + return self.create(s, self.N127, 0) + + def create2(self, s, e, m): + """ creates a value from sign / exponent / mantissa + + bias is added here, to the exponent + """ + e = e + self.P127 # exp (add on bias) + return Cat(m[0:self.e_start], + e[0:self.e_end-self.e_start], + s) + + def nan2(self, s): + return self.create2(s, self.P128, self.msb1) + + def inf2(self, s): + return self.create2(s, self.P128, self.mzero) + + def zero2(self, s): + return self.create2(s, self.N127, self.mzero) + + +class MultiShiftRMerge(Elaboratable): + """ shifts down (right) and merges lower bits into m[0]. + m[0] is the "sticky" bit, basically + """ + def __init__(self, width, s_max=None): + if s_max is None: + s_max = int(log(width) / log(2)) + self.smax = s_max + self.m = Signal(width, reset_less=True) + self.inp = Signal(width, reset_less=True) + self.diff = Signal(s_max, reset_less=True) + self.width = width + + def elaborate(self, platform): + m = Module() + + rs = Signal(self.width, reset_less=True) + m_mask = Signal(self.width, reset_less=True) + smask = Signal(self.width, reset_less=True) + stickybit = Signal(reset_less=True) + maxslen = Signal(self.smax, reset_less=True) + maxsleni = Signal(self.smax, reset_less=True) + + sm = MultiShift(self.width-1) + m0s = Const(0, self.width-1) + mw = Const(self.width-1, len(self.diff)) + m.d.comb += [maxslen.eq(Mux(self.diff > mw, mw, self.diff)), + maxsleni.eq(Mux(self.diff > mw, 0, mw-self.diff)), + ] + + m.d.comb += [ + # shift mantissa by maxslen, mask by inverse + rs.eq(sm.rshift(self.inp[1:], maxslen)), + m_mask.eq(sm.rshift(~m0s, maxsleni)), + smask.eq(self.inp[1:] & m_mask), + # sticky bit combines all mask (and mantissa low bit) + stickybit.eq(smask.bool() | self.inp[0]), + # mantissa result contains m[0] already. + self.m.eq(Cat(stickybit, rs)) + ] + return m + + +class FPNumShift(FPNumBase, Elaboratable): + """ Floating-point Number Class for shifting + """ + def __init__(self, mainm, op, inv, width, m_extra=True): + FPNumBase.__init__(self, width, m_extra) + self.latch_in = Signal() + self.mainm = mainm + self.inv = inv + self.op = op + + def elaborate(self, platform): + m = FPNumBase.elaborate(self, platform) + + m.d.comb += self.s.eq(op.s) + m.d.comb += self.e.eq(op.e) + m.d.comb += self.m.eq(op.m) + + with self.mainm.State("align"): + with m.If(self.e < self.inv.e): + m.d.sync += self.shift_down() + + return m + + def shift_down(self, inp): + """ shifts a mantissa down by one. exponent is increased to compensate + + accuracy is lost as a result in the mantissa however there are 3 + guard bits (the latter of which is the "sticky" bit) + """ + return [self.e.eq(inp.e + 1), + self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0)) + ] + + def shift_down_multi(self, diff): + """ shifts a mantissa down. exponent is increased to compensate + + accuracy is lost as a result in the mantissa however there are 3 + guard bits (the latter of which is the "sticky" bit) + + this code works by variable-shifting the mantissa by up to + its maximum bit-length: no point doing more (it'll still be + zero). + + the sticky bit is computed by shifting a batch of 1s by + the same amount, which will introduce zeros. it's then + inverted and used as a mask to get the LSBs of the mantissa. + those are then |'d into the sticky bit. + """ + sm = MultiShift(self.width) + mw = Const(self.m_width-1, len(diff)) + maxslen = Mux(diff > mw, mw, diff) + rs = sm.rshift(self.m[1:], maxslen) + maxsleni = mw - maxslen + m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert + + stickybits = reduce(or_, self.m[1:] & m_mask) | self.m[0] + return [self.e.eq(self.e + diff), + self.m.eq(Cat(stickybits, rs)) + ] + + def shift_up_multi(self, diff): + """ shifts a mantissa up. exponent is decreased to compensate + """ + sm = MultiShift(self.width) + mw = Const(self.m_width, len(diff)) + maxslen = Mux(diff > mw, mw, diff) + + return [self.e.eq(self.e - diff), + self.m.eq(sm.lshift(self.m, maxslen)) + ] + + +class FPNumDecode(FPNumBase): + """ Floating-point Number Class + + Contains signals for an incoming copy of the value, decoded into + sign / exponent / mantissa. + Also contains encoding functions, creation and recognition of + zero, NaN and inf (all signed) + + Four extra bits are included in the mantissa: the top bit + (m[-1]) is effectively a carry-overflow. The other three are + guard (m[2]), round (m[1]), and sticky (m[0]) + """ + def __init__(self, op, width, m_extra=True): + FPNumBase.__init__(self, width, m_extra) + self.op = op + + def elaborate(self, platform): + m = FPNumBase.elaborate(self, platform) + + m.d.comb += self.decode(self.v) + + return m + + def decode(self, v): + """ decodes a latched value into sign / exponent / mantissa + + bias is subtracted here, from the exponent. exponent + is extended to 10 bits so that subtract 127 is done on + a 10-bit number + """ + args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros + #print ("decode", self.e_end) + return [self.m.eq(Cat(*args)), # mantissa + self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp + self.s.eq(v[-1]), # sign + ] + +class FPNumIn(FPNumBase): + """ Floating-point Number Class + + Contains signals for an incoming copy of the value, decoded into + sign / exponent / mantissa. + Also contains encoding functions, creation and recognition of + zero, NaN and inf (all signed) + + Four extra bits are included in the mantissa: the top bit + (m[-1]) is effectively a carry-overflow. The other three are + guard (m[2]), round (m[1]), and sticky (m[0]) + """ + def __init__(self, op, width, m_extra=True): + FPNumBase.__init__(self, width, m_extra) + self.latch_in = Signal() + self.op = op + + def decode2(self, m): + """ decodes a latched value into sign / exponent / mantissa + + bias is subtracted here, from the exponent. exponent + is extended to 10 bits so that subtract 127 is done on + a 10-bit number + """ + v = self.v + args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros + #print ("decode", self.e_end) + res = ObjectProxy(m, pipemode=False) + res.m = Cat(*args) # mantissa + res.e = v[self.e_start:self.e_end] - self.P127 # exp + res.s = v[-1] # sign + return res + + def decode(self, v): + """ decodes a latched value into sign / exponent / mantissa + + bias is subtracted here, from the exponent. exponent + is extended to 10 bits so that subtract 127 is done on + a 10-bit number + """ + args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros + #print ("decode", self.e_end) + return [self.m.eq(Cat(*args)), # mantissa + self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp + self.s.eq(v[-1]), # sign + ] + + def shift_down(self, inp): + """ shifts a mantissa down by one. exponent is increased to compensate + + accuracy is lost as a result in the mantissa however there are 3 + guard bits (the latter of which is the "sticky" bit) + """ + return [self.e.eq(inp.e + 1), + self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0)) + ] + + def shift_down_multi(self, diff, inp=None): + """ shifts a mantissa down. exponent is increased to compensate + + accuracy is lost as a result in the mantissa however there are 3 + guard bits (the latter of which is the "sticky" bit) + + this code works by variable-shifting the mantissa by up to + its maximum bit-length: no point doing more (it'll still be + zero). + + the sticky bit is computed by shifting a batch of 1s by + the same amount, which will introduce zeros. it's then + inverted and used as a mask to get the LSBs of the mantissa. + those are then |'d into the sticky bit. + """ + if inp is None: + inp = self + sm = MultiShift(self.width) + mw = Const(self.m_width-1, len(diff)) + maxslen = Mux(diff > mw, mw, diff) + rs = sm.rshift(inp.m[1:], maxslen) + maxsleni = mw - maxslen + m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert + + #stickybit = reduce(or_, inp.m[1:] & m_mask) | inp.m[0] + stickybit = (inp.m[1:] & m_mask).bool() | inp.m[0] + return [self.e.eq(inp.e + diff), + self.m.eq(Cat(stickybit, rs)) + ] + + def shift_up_multi(self, diff): + """ shifts a mantissa up. exponent is decreased to compensate + """ + sm = MultiShift(self.width) + mw = Const(self.m_width, len(diff)) + maxslen = Mux(diff > mw, mw, diff) + + return [self.e.eq(self.e - diff), + self.m.eq(sm.lshift(self.m, maxslen)) + ] + +class Trigger(Elaboratable): + def __init__(self): + + self.stb = Signal(reset=0) + self.ack = Signal() + self.trigger = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.trigger.eq(self.stb & self.ack) + return m + + def eq(self, inp): + return [self.stb.eq(inp.stb), + self.ack.eq(inp.ack) + ] + + def ports(self): + return [self.stb, self.ack] + + +class FPOpIn(PrevControl): + def __init__(self, width): + PrevControl.__init__(self) + self.width = width + + @property + def v(self): + return self.data_i + + def chain_inv(self, in_op, extra=None): + stb = in_op.stb + if extra is not None: + stb = stb & extra + return [self.v.eq(in_op.v), # receive value + self.stb.eq(stb), # receive STB + in_op.ack.eq(~self.ack), # send ACK + ] + + def chain_from(self, in_op, extra=None): + stb = in_op.stb + if extra is not None: + stb = stb & extra + return [self.v.eq(in_op.v), # receive value + self.stb.eq(stb), # receive STB + in_op.ack.eq(self.ack), # send ACK + ] + + +class FPOpOut(NextControl): + def __init__(self, width): + NextControl.__init__(self) + self.width = width + + @property + def v(self): + return self.data_o + + def chain_inv(self, in_op, extra=None): + stb = in_op.stb + if extra is not None: + stb = stb & extra + return [self.v.eq(in_op.v), # receive value + self.stb.eq(stb), # receive STB + in_op.ack.eq(~self.ack), # send ACK + ] + + def chain_from(self, in_op, extra=None): + stb = in_op.stb + if extra is not None: + stb = stb & extra + return [self.v.eq(in_op.v), # receive value + self.stb.eq(stb), # receive STB + in_op.ack.eq(self.ack), # send ACK + ] + + +class Overflow: #(Elaboratable): + def __init__(self): + self.guard = Signal(reset_less=True) # tot[2] + self.round_bit = Signal(reset_less=True) # tot[1] + self.sticky = Signal(reset_less=True) # tot[0] + self.m0 = Signal(reset_less=True) # mantissa zero bit + + self.roundz = Signal(reset_less=True) + + def __iter__(self): + yield self.guard + yield self.round_bit + yield self.sticky + yield self.m0 + + def eq(self, inp): + return [self.guard.eq(inp.guard), + self.round_bit.eq(inp.round_bit), + self.sticky.eq(inp.sticky), + self.m0.eq(inp.m0)] + + def elaborate(self, platform): + m = Module() + m.d.comb += self.roundz.eq(self.guard & \ + (self.round_bit | self.sticky | self.m0)) + return m + + +class FPBase: + """ IEEE754 Floating Point Base Class + + contains common functions for FP manipulation, such as + extracting and packing operands, normalisation, denormalisation, + rounding etc. + """ + + def get_op(self, m, op, v, next_state): + """ this function moves to the next state and copies the operand + when both stb and ack are 1. + acknowledgement is sent by setting ack to ZERO. + """ + res = v.decode2(m) + ack = Signal() + with m.If((op.ready_o) & (op.valid_i_test)): + m.next = next_state + # op is latched in from FPNumIn class on same ack/stb + m.d.comb += ack.eq(0) + with m.Else(): + m.d.comb += ack.eq(1) + return [res, ack] + + def denormalise(self, m, a): + """ denormalises a number. this is probably the wrong name for + this function. for normalised numbers (exponent != minimum) + one *extra* bit (the implicit 1) is added *back in*. + for denormalised numbers, the mantissa is left alone + and the exponent increased by 1. + + both cases *effectively multiply the number stored by 2*, + which has to be taken into account when extracting the result. + """ + with m.If(a.exp_n127): + m.d.sync += a.e.eq(a.N126) # limit a exponent + with m.Else(): + m.d.sync += a.m[-1].eq(1) # set top mantissa bit + + def op_normalise(self, m, op, next_state): + """ operand normalisation + NOTE: just like "align", this one keeps going round every clock + until the result's exponent is within acceptable "range" + """ + with m.If((op.m[-1] == 0)): # check last bit of mantissa + m.d.sync +=[ + op.e.eq(op.e - 1), # DECREASE exponent + op.m.eq(op.m << 1), # shift mantissa UP + ] + with m.Else(): + m.next = next_state + + def normalise_1(self, m, z, of, next_state): + """ first stage normalisation + + NOTE: just like "align", this one keeps going round every clock + until the result's exponent is within acceptable "range" + NOTE: the weirdness of reassigning guard and round is due to + the extra mantissa bits coming from tot[0..2] + """ + with m.If((z.m[-1] == 0) & (z.e > z.N126)): + m.d.sync += [ + z.e.eq(z.e - 1), # DECREASE exponent + z.m.eq(z.m << 1), # shift mantissa UP + z.m[0].eq(of.guard), # steal guard bit (was tot[2]) + of.guard.eq(of.round_bit), # steal round_bit (was tot[1]) + of.round_bit.eq(0), # reset round bit + of.m0.eq(of.guard), + ] + with m.Else(): + m.next = next_state + + def normalise_2(self, m, z, of, next_state): + """ second stage normalisation + + NOTE: just like "align", this one keeps going round every clock + until the result's exponent is within acceptable "range" + NOTE: the weirdness of reassigning guard and round is due to + the extra mantissa bits coming from tot[0..2] + """ + with m.If(z.e < z.N126): + m.d.sync +=[ + z.e.eq(z.e + 1), # INCREASE exponent + z.m.eq(z.m >> 1), # shift mantissa DOWN + of.guard.eq(z.m[0]), + of.m0.eq(z.m[1]), + of.round_bit.eq(of.guard), + of.sticky.eq(of.sticky | of.round_bit) + ] + with m.Else(): + m.next = next_state + + def roundz(self, m, z, roundz): + """ performs rounding on the output. TODO: different kinds of rounding + """ + with m.If(roundz): + m.d.sync += z.m.eq(z.m + 1) # mantissa rounds up + with m.If(z.m == z.m1s): # all 1s + m.d.sync += z.e.eq(z.e + 1) # exponent rounds up + + def corrections(self, m, z, next_state): + """ denormalisation and sign-bug corrections + """ + m.next = next_state + # denormalised, correct exponent to zero + with m.If(z.is_denormalised): + m.d.sync += z.e.eq(z.N127) + + def pack(self, m, z, next_state): + """ packs the result into the output (detects overflow->Inf) + """ + m.next = next_state + # if overflow occurs, return inf + with m.If(z.is_overflowed): + m.d.sync += z.inf(z.s) + with m.Else(): + m.d.sync += z.create(z.s, z.e, z.m) + + def put_z(self, m, z, out_z, next_state): + """ put_z: stores the result in the output. raises stb and waits + for ack to be set to 1 before moving to the next state. + resets stb back to zero when that occurs, as acknowledgement. + """ + m.d.sync += [ + out_z.v.eq(z.v) + ] + with m.If(out_z.valid_o & out_z.ready_i_test): + m.d.sync += out_z.valid_o.eq(0) + m.next = next_state + with m.Else(): + m.d.sync += out_z.valid_o.eq(1) + + +class FPState(FPBase): + def __init__(self, state_from): + self.state_from = state_from + + def set_inputs(self, inputs): + self.inputs = inputs + for k,v in inputs.items(): + setattr(self, k, v) + + def set_outputs(self, outputs): + self.outputs = outputs + for k,v in outputs.items(): + setattr(self, k, v) + + +class FPID: + def __init__(self, id_wid): + self.id_wid = id_wid + if self.id_wid: + self.in_mid = Signal(id_wid, reset_less=True) + self.out_mid = Signal(id_wid, reset_less=True) + else: + self.in_mid = None + self.out_mid = None + + def idsync(self, m): + if self.id_wid is not None: + m.d.sync += self.out_mid.eq(self.in_mid) + + diff --git a/src/ieee754/add/fpcommon/__init__.py b/src/ieee754/add/fpcommon/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/ieee754/add/fpcommon/corrections.py b/src/ieee754/add/fpcommon/corrections.py new file mode 100644 index 00000000..ce9ba3cd --- /dev/null +++ b/src/ieee754/add/fpcommon/corrections.py @@ -0,0 +1,69 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Elaboratable +from nmigen.cli import main, verilog +from fpbase import FPState +from fpcommon.roundz import FPRoundData + + +class FPCorrectionsMod(Elaboratable): + + def __init__(self, width, id_wid): + self.width = width + self.id_wid = id_wid + self.i = self.ispec() + self.out_z = self.ospec() + + def ispec(self): + return FPRoundData(self.width, self.id_wid) + + def ospec(self): + return FPRoundData(self.width, self.id_wid) + + def process(self, i): + return self.out_z + + def setup(self, m, i): + """ links module to inputs and outputs + """ + m.submodules.corrections = self + m.d.comb += self.i.eq(i) + + def elaborate(self, platform): + m = Module() + m.submodules.corr_in_z = self.i.z + m.submodules.corr_out_z = self.out_z.z + m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z + with m.If(~self.i.out_do_z): + with m.If(self.i.z.is_denormalised): + m.d.comb += self.out_z.z.e.eq(self.i.z.N127) + return m + + +class FPCorrections(FPState): + + def __init__(self, width, id_wid): + FPState.__init__(self, "corrections") + self.mod = FPCorrectionsMod(width) + self.out_z = self.ospec() + + def ispec(self): + return self.mod.ispec() + + def ospec(self): + return self.mod.ospec() + + def setup(self, m, in_z): + """ links module to inputs and outputs + """ + self.mod.setup(m, in_z) + + m.d.sync += self.out_z.eq(self.mod.out_z) + m.d.sync += self.out_z.mid.eq(self.mod.o.mid) + + def action(self, m): + m.next = "pack" + + diff --git a/src/ieee754/add/fpcommon/denorm.py b/src/ieee754/add/fpcommon/denorm.py new file mode 100644 index 00000000..9fbbc976 --- /dev/null +++ b/src/ieee754/add/fpcommon/denorm.py @@ -0,0 +1,108 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal +from nmigen.cli import main, verilog +from math import log + +from fpbase import FPNumIn, FPNumOut, FPNumBase +from fpbase import FPState + + +class FPSCData: + + def __init__(self, width, id_wid): + self.a = FPNumBase(width, True) + self.b = FPNumBase(width, True) + self.z = FPNumOut(width, False) + self.oz = Signal(width, reset_less=True) + self.out_do_z = Signal(reset_less=True) + self.mid = Signal(id_wid, reset_less=True) + + def __iter__(self): + yield from self.a + yield from self.b + yield from self.z + yield self.oz + yield self.out_do_z + yield self.mid + + def eq(self, i): + return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz), + self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)] + + +class FPAddDeNormMod(FPState): + + def __init__(self, width, id_wid): + self.width = width + self.id_wid = id_wid + self.i = self.ispec() + self.o = self.ospec() + + def ispec(self): + return FPSCData(self.width, self.id_wid) + + def ospec(self): + return FPSCData(self.width, self.id_wid) + + def process(self, i): + return self.o + + def setup(self, m, i): + """ links module to inputs and outputs + """ + m.submodules.denormalise = self + m.d.comb += self.i.eq(i) + + def elaborate(self, platform): + m = Module() + m.submodules.denorm_in_a = self.i.a + m.submodules.denorm_in_b = self.i.b + m.submodules.denorm_out_a = self.o.a + m.submodules.denorm_out_b = self.o.b + + with m.If(~self.i.out_do_z): + # XXX hmmm, don't like repeating identical code + m.d.comb += self.o.a.eq(self.i.a) + with m.If(self.i.a.exp_n127): + m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent + with m.Else(): + m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit + + m.d.comb += self.o.b.eq(self.i.b) + with m.If(self.i.b.exp_n127): + m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent + with m.Else(): + m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit + + m.d.comb += self.o.mid.eq(self.i.mid) + m.d.comb += self.o.z.eq(self.i.z) + m.d.comb += self.o.out_do_z.eq(self.i.out_do_z) + m.d.comb += self.o.oz.eq(self.i.oz) + + return m + + +class FPAddDeNorm(FPState): + + def __init__(self, width, id_wid): + FPState.__init__(self, "denormalise") + self.mod = FPAddDeNormMod(width) + self.out_a = FPNumBase(width) + self.out_b = FPNumBase(width) + + def setup(self, m, i): + """ links module to inputs and outputs + """ + self.mod.setup(m, i) + + m.d.sync += self.out_a.eq(self.mod.out_a) + m.d.sync += self.out_b.eq(self.mod.out_b) + + def action(self, m): + # Denormalised Number checks + m.next = "align" + + diff --git a/src/ieee754/add/fpcommon/getop.py b/src/ieee754/add/fpcommon/getop.py new file mode 100644 index 00000000..1988997a --- /dev/null +++ b/src/ieee754/add/fpcommon/getop.py @@ -0,0 +1,174 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal, Cat, Mux, Array, Const, Elaboratable +from nmigen.lib.coding import PriorityEncoder +from nmigen.cli import main, verilog +from math import log + +from fpbase import FPNumIn, FPNumOut, FPOpIn, Overflow, FPBase, FPNumBase +from fpbase import MultiShiftRMerge, Trigger +from singlepipe import (ControlBase, StageChain, SimpleHandshake, + PassThroughStage, PrevControl) +from multipipe import CombMuxOutPipe +from multipipe import PriorityCombMuxInPipe + +from fpbase import FPState +import nmoperator + + +class FPGetOpMod(Elaboratable): + def __init__(self, width): + self.in_op = FPOpIn(width) + self.in_op.data_i = Signal(width) + self.out_op = Signal(width) + self.out_decode = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.out_decode.eq((self.in_op.ready_o) & \ + (self.in_op.valid_i_test)) + m.submodules.get_op_in = self.in_op + #m.submodules.get_op_out = self.out_op + with m.If(self.out_decode): + m.d.comb += [ + self.out_op.eq(self.in_op.v), + ] + return m + + +class FPGetOp(FPState): + """ gets operand + """ + + def __init__(self, in_state, out_state, in_op, width): + FPState.__init__(self, in_state) + self.out_state = out_state + self.mod = FPGetOpMod(width) + self.in_op = in_op + self.out_op = Signal(width) + self.out_decode = Signal(reset_less=True) + + def setup(self, m, in_op): + """ links module to inputs and outputs + """ + setattr(m.submodules, self.state_from, self.mod) + m.d.comb += nmoperator.eq(self.mod.in_op, in_op) + m.d.comb += self.out_decode.eq(self.mod.out_decode) + + def action(self, m): + with m.If(self.out_decode): + m.next = self.out_state + m.d.sync += [ + self.in_op.ready_o.eq(0), + self.out_op.eq(self.mod.out_op) + ] + with m.Else(): + m.d.sync += self.in_op.ready_o.eq(1) + + +class FPNumBase2Ops: + + def __init__(self, width, id_wid, m_extra=True): + self.a = FPNumBase(width, m_extra) + self.b = FPNumBase(width, m_extra) + self.mid = Signal(id_wid, reset_less=True) + + def eq(self, i): + return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)] + + def ports(self): + return [self.a, self.b, self.mid] + + +class FPADDBaseData: + + def __init__(self, width, id_wid): + self.width = width + self.id_wid = id_wid + self.a = Signal(width) + self.b = Signal(width) + self.mid = Signal(id_wid, reset_less=True) + + def eq(self, i): + return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)] + + def ports(self): + return [self.a, self.b, self.mid] + + +class FPGet2OpMod(PrevControl): + def __init__(self, width, id_wid): + PrevControl.__init__(self) + self.width = width + self.id_wid = id_wid + self.data_i = self.ispec() + self.i = self.data_i + self.o = self.ospec() + + def ispec(self): + return FPADDBaseData(self.width, self.id_wid) + + def ospec(self): + return FPADDBaseData(self.width, self.id_wid) + + def process(self, i): + return self.o + + def elaborate(self, platform): + m = PrevControl.elaborate(self, platform) + with m.If(self.trigger): + m.d.comb += [ + self.o.eq(self.data_i), + ] + return m + + +class FPGet2Op(FPState): + """ gets operands + """ + + def __init__(self, in_state, out_state, width, id_wid): + FPState.__init__(self, in_state) + self.out_state = out_state + self.mod = FPGet2OpMod(width, id_wid) + self.o = self.ospec() + self.in_stb = Signal(reset_less=True) + self.out_ack = Signal(reset_less=True) + self.out_decode = Signal(reset_less=True) + + def ispec(self): + return self.mod.ispec() + + def ospec(self): + return self.mod.ospec() + + def trigger_setup(self, m, in_stb, in_ack): + """ links stb/ack + """ + m.d.comb += self.mod.valid_i.eq(in_stb) + m.d.comb += in_ack.eq(self.mod.ready_o) + + def setup(self, m, i): + """ links module to inputs and outputs + """ + m.submodules.get_ops = self.mod + m.d.comb += self.mod.i.eq(i) + m.d.comb += self.out_ack.eq(self.mod.ready_o) + m.d.comb += self.out_decode.eq(self.mod.trigger) + + def process(self, i): + return self.o + + def action(self, m): + with m.If(self.out_decode): + m.next = self.out_state + m.d.sync += [ + self.mod.ready_o.eq(0), + self.o.eq(self.mod.o), + ] + with m.Else(): + m.d.sync += self.mod.ready_o.eq(1) + + diff --git a/src/ieee754/add/fpcommon/normtopack.py b/src/ieee754/add/fpcommon/normtopack.py new file mode 100644 index 00000000..87d08125 --- /dev/null +++ b/src/ieee754/add/fpcommon/normtopack.py @@ -0,0 +1,52 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +#from nmigen.cli import main, verilog + +from singlepipe import StageChain, SimpleHandshake + +from fpbase import FPState, FPID +from fpcommon.postcalc import FPAddStage1Data +from fpcommon.postnormalise import FPNorm1ModSingle +from fpcommon.roundz import FPRoundMod +from fpcommon.corrections import FPCorrectionsMod +from fpcommon.pack import FPPackData, FPPackMod + + +class FPNormToPack(FPState, SimpleHandshake): + + def __init__(self, width, id_wid): + FPState.__init__(self, "normalise_1") + self.id_wid = id_wid + self.width = width + SimpleHandshake.__init__(self, self) # pipeline is its own stage + + def ispec(self): + return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec + + def ospec(self): + return FPPackData(self.width, self.id_wid) # FPPackMod ospec + + def setup(self, m, i): + """ links module to inputs and outputs + """ + + # Normalisation, Rounding Corrections, Pack - in a chain + nmod = FPNorm1ModSingle(self.width, self.id_wid) + rmod = FPRoundMod(self.width, self.id_wid) + cmod = FPCorrectionsMod(self.width, self.id_wid) + pmod = FPPackMod(self.width, self.id_wid) + stages = [nmod, rmod, cmod, pmod] + chain = StageChain(stages) + chain.setup(m, i) + self.out_z = pmod.ospec() + + self.o = pmod.o + + def process(self, i): + return self.o + + def action(self, m): + m.d.sync += self.out_z.eq(self.process(None)) + m.next = "pack_put_z" diff --git a/src/ieee754/add/fpcommon/pack.py b/src/ieee754/add/fpcommon/pack.py new file mode 100644 index 00000000..1464883c --- /dev/null +++ b/src/ieee754/add/fpcommon/pack.py @@ -0,0 +1,84 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal, Elaboratable +from nmigen.cli import main, verilog + +from fpbase import FPNumOut +from fpbase import FPState +from fpcommon.roundz import FPRoundData +from singlepipe import Object + + +class FPPackData(Object): + + def __init__(self, width, id_wid): + Object.__init__(self) + self.z = Signal(width, reset_less=True) + self.mid = Signal(id_wid, reset_less=True) + + +class FPPackMod(Elaboratable): + + def __init__(self, width, id_wid): + self.width = width + self.id_wid = id_wid + self.i = self.ispec() + self.o = self.ospec() + + def ispec(self): + return FPRoundData(self.width, self.id_wid) + + def ospec(self): + return FPPackData(self.width, self.id_wid) + + def process(self, i): + return self.o + + def setup(self, m, in_z): + """ links module to inputs and outputs + """ + m.submodules.pack = self + m.d.comb += self.i.eq(in_z) + + def elaborate(self, platform): + m = Module() + z = FPNumOut(self.width, False) + m.submodules.pack_in_z = self.i.z + m.submodules.pack_out_z = z + m.d.comb += self.o.mid.eq(self.i.mid) + with m.If(~self.i.out_do_z): + with m.If(self.i.z.is_overflowed): + m.d.comb += z.inf(self.i.z.s) + with m.Else(): + m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m) + with m.Else(): + m.d.comb += z.v.eq(self.i.oz) + m.d.comb += self.o.z.eq(z.v) + return m + + +class FPPack(FPState): + + def __init__(self, width, id_wid): + FPState.__init__(self, "pack") + self.mod = FPPackMod(width) + self.out_z = self.ospec() + + def ispec(self): + return self.mod.ispec() + + def ospec(self): + return self.mod.ospec() + + def setup(self, m, in_z): + """ links module to inputs and outputs + """ + self.mod.setup(m, in_z) + + m.d.sync += self.out_z.v.eq(self.mod.out_z.v) + m.d.sync += self.out_z.mid.eq(self.mod.o.mid) + + def action(self, m): + m.next = "pack_put_z" diff --git a/src/ieee754/add/fpcommon/postcalc.py b/src/ieee754/add/fpcommon/postcalc.py new file mode 100644 index 00000000..7111dc8a --- /dev/null +++ b/src/ieee754/add/fpcommon/postcalc.py @@ -0,0 +1,26 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Signal +from fpbase import Overflow, FPNumBase + +class FPAddStage1Data: + + def __init__(self, width, id_wid): + self.z = FPNumBase(width, False) + self.out_do_z = Signal(reset_less=True) + self.oz = Signal(width, reset_less=True) + self.of = Overflow() + self.mid = Signal(id_wid, reset_less=True) + + def __iter__(self): + yield from self.z + yield self.out_do_z + yield self.oz + yield from self.of + yield self.mid + + def eq(self, i): + return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz), + self.of.eq(i.of), self.mid.eq(i.mid)] diff --git a/src/ieee754/add/fpcommon/postnormalise.py b/src/ieee754/add/fpcommon/postnormalise.py new file mode 100644 index 00000000..b072490f --- /dev/null +++ b/src/ieee754/add/fpcommon/postnormalise.py @@ -0,0 +1,270 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal, Cat, Mux, Elaboratable +from nmigen.lib.coding import PriorityEncoder +from nmigen.cli import main, verilog +from math import log + +from fpbase import Overflow, FPNumBase +from fpbase import MultiShiftRMerge +from fpbase import FPState +from .postcalc import FPAddStage1Data + + +class FPNorm1Data: + + def __init__(self, width, id_wid): + self.roundz = Signal(reset_less=True) + self.z = FPNumBase(width, False) + self.out_do_z = Signal(reset_less=True) + self.oz = Signal(width, reset_less=True) + self.mid = Signal(id_wid, reset_less=True) + + def eq(self, i): + return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz), + self.roundz.eq(i.roundz), self.mid.eq(i.mid)] + + +class FPNorm1ModSingle(Elaboratable): + + def __init__(self, width, id_wid): + self.width = width + self.id_wid = id_wid + self.i = self.ispec() + self.o = self.ospec() + + def ispec(self): + return FPAddStage1Data(self.width, self.id_wid) + + def ospec(self): + return FPNorm1Data(self.width, self.id_wid) + + def setup(self, m, i): + """ links module to inputs and outputs + """ + m.submodules.normalise_1 = self + m.d.comb += self.i.eq(i) + + def process(self, i): + return self.o + + def elaborate(self, platform): + m = Module() + + mwid = self.o.z.m_width+2 + pe = PriorityEncoder(mwid) + m.submodules.norm_pe = pe + + of = Overflow() + m.d.comb += self.o.roundz.eq(of.roundz) + + m.submodules.norm1_out_z = self.o.z + m.submodules.norm1_out_overflow = of + m.submodules.norm1_in_z = self.i.z + m.submodules.norm1_in_overflow = self.i.of + + i = self.ispec() + m.submodules.norm1_insel_z = i.z + m.submodules.norm1_insel_overflow = i.of + + espec = (len(i.z.e), True) + ediff_n126 = Signal(espec, reset_less=True) + msr = MultiShiftRMerge(mwid, espec) + m.submodules.multishift_r = msr + + m.d.comb += i.eq(self.i) + # initialise out from in (overridden below) + m.d.comb += self.o.z.eq(i.z) + m.d.comb += of.eq(i.of) + # normalisation increase/decrease conditions + decrease = Signal(reset_less=True) + increase = Signal(reset_less=True) + m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126) + m.d.comb += increase.eq(i.z.exp_lt_n126) + # decrease exponent + with m.If(~self.i.out_do_z): + with m.If(decrease): + # *sigh* not entirely obvious: count leading zeros (clz) + # with a PriorityEncoder: to find from the MSB + # we reverse the order of the bits. + temp_m = Signal(mwid, reset_less=True) + temp_s = Signal(mwid+1, reset_less=True) + clz = Signal((len(i.z.e), True), reset_less=True) + # make sure that the amount to decrease by does NOT + # go below the minimum non-INF/NaN exponent + limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o, + i.z.exp_sub_n126) + m.d.comb += [ + # cat round and guard bits back into the mantissa + temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)), + pe.i.eq(temp_m[::-1]), # inverted + clz.eq(limclz), # count zeros from MSB down + temp_s.eq(temp_m << clz), # shift mantissa UP + self.o.z.e.eq(i.z.e - clz), # DECREASE exponent + self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1 + of.m0.eq(temp_s[2]), # copy of mantissa[0] + # overflow in bits 0..1: got shifted too (leave sticky) + of.guard.eq(temp_s[1]), # guard + of.round_bit.eq(temp_s[0]), # round + ] + # increase exponent + with m.Elif(increase): + temp_m = Signal(mwid+1, reset_less=True) + m.d.comb += [ + temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard, + i.z.m)), + ediff_n126.eq(i.z.N126 - i.z.e), + # connect multi-shifter to inp/out mantissa (and ediff) + msr.inp.eq(temp_m), + msr.diff.eq(ediff_n126), + self.o.z.m.eq(msr.m[3:]), + of.m0.eq(temp_s[3]), # copy of mantissa[0] + # overflow in bits 0..1: got shifted too (leave sticky) + of.guard.eq(temp_s[2]), # guard + of.round_bit.eq(temp_s[1]), # round + of.sticky.eq(temp_s[0]), # sticky + self.o.z.e.eq(i.z.e + ediff_n126), + ] + + m.d.comb += self.o.mid.eq(self.i.mid) + m.d.comb += self.o.out_do_z.eq(self.i.out_do_z) + m.d.comb += self.o.oz.eq(self.i.oz) + + return m + + +class FPNorm1ModMulti: + + def __init__(self, width, single_cycle=True): + self.width = width + self.in_select = Signal(reset_less=True) + self.in_z = FPNumBase(width, False) + self.in_of = Overflow() + self.temp_z = FPNumBase(width, False) + self.temp_of = Overflow() + self.out_z = FPNumBase(width, False) + self.out_of = Overflow() + + def elaborate(self, platform): + m = Module() + + m.submodules.norm1_out_z = self.out_z + m.submodules.norm1_out_overflow = self.out_of + m.submodules.norm1_temp_z = self.temp_z + m.submodules.norm1_temp_of = self.temp_of + m.submodules.norm1_in_z = self.in_z + m.submodules.norm1_in_overflow = self.in_of + + in_z = FPNumBase(self.width, False) + in_of = Overflow() + m.submodules.norm1_insel_z = in_z + m.submodules.norm1_insel_overflow = in_of + + # select which of temp or in z/of to use + with m.If(self.in_select): + m.d.comb += in_z.eq(self.in_z) + m.d.comb += in_of.eq(self.in_of) + with m.Else(): + m.d.comb += in_z.eq(self.temp_z) + m.d.comb += in_of.eq(self.temp_of) + # initialise out from in (overridden below) + m.d.comb += self.out_z.eq(in_z) + m.d.comb += self.out_of.eq(in_of) + # normalisation increase/decrease conditions + decrease = Signal(reset_less=True) + increase = Signal(reset_less=True) + m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126) + m.d.comb += increase.eq(in_z.exp_lt_n126) + m.d.comb += self.out_norm.eq(decrease | increase) # loop-end + # decrease exponent + with m.If(decrease): + m.d.comb += [ + self.out_z.e.eq(in_z.e - 1), # DECREASE exponent + self.out_z.m.eq(in_z.m << 1), # shift mantissa UP + self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2]) + self.out_of.guard.eq(in_of.round_bit), # round (was tot[1]) + self.out_of.round_bit.eq(0), # reset round bit + self.out_of.m0.eq(in_of.guard), + ] + # increase exponent + with m.Elif(increase): + m.d.comb += [ + self.out_z.e.eq(in_z.e + 1), # INCREASE exponent + self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN + self.out_of.guard.eq(in_z.m[0]), + self.out_of.m0.eq(in_z.m[1]), + self.out_of.round_bit.eq(in_of.guard), + self.out_of.sticky.eq(in_of.sticky | in_of.round_bit) + ] + + return m + + +class FPNorm1Single(FPState): + + def __init__(self, width, id_wid, single_cycle=True): + FPState.__init__(self, "normalise_1") + self.mod = FPNorm1ModSingle(width) + self.o = self.ospec() + self.out_z = FPNumBase(width, False) + self.out_roundz = Signal(reset_less=True) + + def ispec(self): + return self.mod.ispec() + + def ospec(self): + return self.mod.ospec() + + def setup(self, m, i): + """ links module to inputs and outputs + """ + self.mod.setup(m, i) + + def action(self, m): + m.next = "round" + + +class FPNorm1Multi(FPState): + + def __init__(self, width, id_wid): + FPState.__init__(self, "normalise_1") + self.mod = FPNorm1ModMulti(width) + self.stb = Signal(reset_less=True) + self.ack = Signal(reset=0, reset_less=True) + self.out_norm = Signal(reset_less=True) + self.in_accept = Signal(reset_less=True) + self.temp_z = FPNumBase(width) + self.temp_of = Overflow() + self.out_z = FPNumBase(width) + self.out_roundz = Signal(reset_less=True) + + def setup(self, m, in_z, in_of, norm_stb): + """ links module to inputs and outputs + """ + self.mod.setup(m, in_z, in_of, norm_stb, + self.in_accept, self.temp_z, self.temp_of, + self.out_z, self.out_norm) + + m.d.comb += self.stb.eq(norm_stb) + m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state + + def action(self, m): + m.d.comb += self.in_accept.eq((~self.ack) & (self.stb)) + m.d.sync += self.temp_of.eq(self.mod.out_of) + m.d.sync += self.temp_z.eq(self.out_z) + with m.If(self.out_norm): + with m.If(self.in_accept): + m.d.sync += [ + self.ack.eq(1), + ] + with m.Else(): + m.d.sync += self.ack.eq(0) + with m.Else(): + # normalisation not required (or done). + m.next = "round" + m.d.sync += self.ack.eq(1) + m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz) + + diff --git a/src/ieee754/add/fpcommon/prenormalise.py b/src/ieee754/add/fpcommon/prenormalise.py new file mode 100644 index 00000000..0b3a65cb --- /dev/null +++ b/src/ieee754/add/fpcommon/prenormalise.py @@ -0,0 +1,83 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal, Cat, +from nmigen.lib.coding import PriorityEncoder +from nmigen.cli import main, verilog +from math import log + +from fpbase import Overflow, FPNumBase +from fpbase import MultiShiftRMerge + +from fpbase import FPState + + +class FPNormaliseModSingle: + + def __init__(self, width): + self.width = width + self.in_z = self.ispec() + self.out_z = self.ospec() + + def ispec(self): + return FPNumBase(self.width, False) + + def ospec(self): + return FPNumBase(self.width, False) + + def setup(self, m, i): + """ links module to inputs and outputs + """ + m.submodules.normalise = self + m.d.comb += self.i.eq(i) + + def elaborate(self, platform): + m = Module() + + mwid = self.out_z.m_width+2 + pe = PriorityEncoder(mwid) + m.submodules.norm_pe = pe + + m.submodules.norm1_out_z = self.out_z + m.submodules.norm1_in_z = self.in_z + + in_z = FPNumBase(self.width, False) + in_of = Overflow() + m.submodules.norm1_insel_z = in_z + m.submodules.norm1_insel_overflow = in_of + + espec = (len(in_z.e), True) + ediff_n126 = Signal(espec, reset_less=True) + msr = MultiShiftRMerge(mwid, espec) + m.submodules.multishift_r = msr + + m.d.comb += in_z.eq(self.in_z) + m.d.comb += in_of.eq(self.in_of) + # initialise out from in (overridden below) + m.d.comb += self.out_z.eq(in_z) + m.d.comb += self.out_of.eq(in_of) + # normalisation decrease condition + decrease = Signal(reset_less=True) + m.d.comb += decrease.eq(in_z.m_msbzero) + # decrease exponent + with m.If(decrease): + # *sigh* not entirely obvious: count leading zeros (clz) + # with a PriorityEncoder: to find from the MSB + # we reverse the order of the bits. + temp_m = Signal(mwid, reset_less=True) + temp_s = Signal(mwid+1, reset_less=True) + clz = Signal((len(in_z.e), True), reset_less=True) + m.d.comb += [ + # cat round and guard bits back into the mantissa + temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)), + pe.i.eq(temp_m[::-1]), # inverted + clz.eq(pe.o), # count zeros from MSB down + temp_s.eq(temp_m << clz), # shift mantissa UP + self.out_z.e.eq(in_z.e - clz), # DECREASE exponent + self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1 + ] + + return m + + diff --git a/src/ieee754/add/fpcommon/putz.py b/src/ieee754/add/fpcommon/putz.py new file mode 100644 index 00000000..8173ed85 --- /dev/null +++ b/src/ieee754/add/fpcommon/putz.py @@ -0,0 +1,60 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Signal +from nmigen.cli import main, verilog +from fpbase import FPState + + +class FPPutZ(FPState): + + def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None): + FPState.__init__(self, state) + if to_state is None: + to_state = "get_ops" + self.to_state = to_state + self.in_z = in_z + self.out_z = out_z + self.in_mid = in_mid + self.out_mid = out_mid + + def action(self, m): + if self.in_mid is not None: + m.d.sync += self.out_mid.eq(self.in_mid) + m.d.sync += [ + self.out_z.z.v.eq(self.in_z) + ] + with m.If(self.out_z.z.valid_o & self.out_z.z.ready_i_test): + m.d.sync += self.out_z.z.valid_o.eq(0) + m.next = self.to_state + with m.Else(): + m.d.sync += self.out_z.z.valid_o.eq(1) + + +class FPPutZIdx(FPState): + + def __init__(self, state, in_z, out_zs, in_mid, to_state=None): + FPState.__init__(self, state) + if to_state is None: + to_state = "get_ops" + self.to_state = to_state + self.in_z = in_z + self.out_zs = out_zs + self.in_mid = in_mid + + def action(self, m): + outz_stb = Signal(reset_less=True) + outz_ack = Signal(reset_less=True) + m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].valid_o), + outz_ack.eq(self.out_zs[self.in_mid].ready_i_test), + ] + m.d.sync += [ + self.out_zs[self.in_mid].v.eq(self.in_z.v) + ] + with m.If(outz_stb & outz_ack): + m.d.sync += self.out_zs[self.in_mid].valid_o.eq(0) + m.next = self.to_state + with m.Else(): + m.d.sync += self.out_zs[self.in_mid].valid_o.eq(1) + diff --git a/src/ieee754/add/fpcommon/roundz.py b/src/ieee754/add/fpcommon/roundz.py new file mode 100644 index 00000000..420d6669 --- /dev/null +++ b/src/ieee754/add/fpcommon/roundz.py @@ -0,0 +1,82 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal, Elaboratable +from nmigen.cli import main, verilog + +from fpbase import FPNumBase +from fpbase import FPState +from fpcommon.postnormalise import FPNorm1Data + + +class FPRoundData: + + def __init__(self, width, id_wid): + self.z = FPNumBase(width, False) + self.out_do_z = Signal(reset_less=True) + self.oz = Signal(width, reset_less=True) + self.mid = Signal(id_wid, reset_less=True) + + def eq(self, i): + return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz), + self.mid.eq(i.mid)] + + +class FPRoundMod(Elaboratable): + + def __init__(self, width, id_wid): + self.width = width + self.id_wid = id_wid + self.i = self.ispec() + self.out_z = self.ospec() + + def ispec(self): + return FPNorm1Data(self.width, self.id_wid) + + def ospec(self): + return FPRoundData(self.width, self.id_wid) + + def process(self, i): + return self.out_z + + def setup(self, m, i): + m.submodules.roundz = self + m.d.comb += self.i.eq(i) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z + with m.If(~self.i.out_do_z): + with m.If(self.i.roundz): + m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up + with m.If(self.i.z.m == self.i.z.m1s): # all 1s + m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up + + return m + + +class FPRound(FPState): + + def __init__(self, width, id_wid): + FPState.__init__(self, "round") + self.mod = FPRoundMod(width) + self.out_z = self.ospec() + + def ispec(self): + return self.mod.ispec() + + def ospec(self): + return self.mod.ospec() + + def setup(self, m, i): + """ links module to inputs and outputs + """ + self.mod.setup(m, i) + + self.idsync(m) + m.d.sync += self.out_z.eq(self.mod.out_z) + m.d.sync += self.out_z.mid.eq(self.mod.o.mid) + + def action(self, m): + m.next = "corrections" diff --git a/src/ieee754/add/fsqrt.py b/src/ieee754/add/fsqrt.py new file mode 100644 index 00000000..02449b0f --- /dev/null +++ b/src/ieee754/add/fsqrt.py @@ -0,0 +1,256 @@ +from sfpy import Float32 + + +# XXX DO NOT USE, fails on num=65536. wark-wark... +def sqrtsimple(num): + res = 0 + bit = 1 + + while (bit < num): + bit <<= 2 + + while (bit != 0): + if (num >= res + bit): + num -= res + bit + res = (res >> 1) + bit + else: + res >>= 1 + bit >>= 2 + + return res + + +def sqrt(num): + D = num # D is input (from num) + Q = 0 # quotient + R = 0 # remainder + for i in range(64, -1, -1): # negative ranges are weird... + + R = (R<<2)|((D>>(i+i))&3) + + if R >= 0: + R -= ((Q<<2)|1) # -Q01 + else: + R += ((Q<<2)|3) # +Q11 + + Q <<= 1 + if R >= 0: + Q |= 1 # new Q + + if R < 0: + R = R + ((Q<<1)|1) + + return Q, R + + +# grabbed these from unit_test_single (convenience, this is just experimenting) + +def get_mantissa(x): + return 0x7fffff & x + +def get_exponent(x): + return ((x & 0x7f800000) >> 23) - 127 + +def set_exponent(x, e): + return (x & ~0x7f800000) | ((e+127) << 23) + +def get_sign(x): + return ((x & 0x80000000) >> 31) + +# convert FP32 to s/e/m +def create_fp32(s, e, m): + """ receive sign, exponent, mantissa, return FP32 """ + return set_exponent((s << 31) | get_mantissa(m)) + +# convert s/e/m to FP32 +def decode_fp32(x): + """ receive FP32, return sign, exponent, mantissa """ + return get_sign(x), get_exponent(x), get_mantissa(x) + + +# main function, takes mantissa and exponent as separate arguments +# returns a tuple, sqrt'd mantissa, sqrt'd exponent + +def main(mantissa, exponent): + if exponent & 1 != 0: + # shift mantissa up, subtract 1 from exp to compensate + mantissa <<= 1 + exponent -= 1 + m, r = sqrt(mantissa) + return m, r, exponent >> 1 + + +#normalization function +def normalise(s, m, e, lowbits): + if (lowbits >= 2): + m += 1 + if get_mantissa(m) == ((1<<24)-1): + e += 1 + return s, m, e + + +def fsqrt_test(x): + + xbits = x.bits + print ("x", x, type(x)) + sq_test = x.sqrt() + print ("sqrt", sq_test) + + print (xbits, type(xbits)) + s, e, m = decode_fp32(xbits) + print("x decode", s, e, m, hex(m)) + + m |= 1<<23 # set top bit (the missing "1" from mantissa) + m <<= 27 + + sm, sr, se = main(m, e) + lowbits = sm & 0x3 + sm >>= 2 + sm = get_mantissa(sm) + #sm += 2 + + s, sm, se = normalise(s, sm, se, lowbits) + + print("our sqrt", s, se, sm, hex(sm), bin(sm), "lowbits", lowbits, + "rem", hex(sr)) + if lowbits >= 2: + print ("probably needs rounding (+1 on mantissa)") + + sq_xbits = sq_test.bits + s, e, m = decode_fp32(sq_xbits) + print ("sf32 sqrt", s, e, m, hex(m), bin(m)) + print () + +if __name__ == '__main__': + + # quick test up to 1000 of two sqrt functions + for Q in range(1, int(1e4)): + print(Q, sqrt(Q), sqrtsimple(Q), int(Q**0.5)) + assert int(Q**0.5) == sqrtsimple(Q), "Q sqrtsimpl fail %d" % Q + assert int(Q**0.5) == sqrt(Q)[0], "Q sqrt fail %d" % Q + + # quick mantissa/exponent demo + for e in range(26): + for m in range(26): + ms, mr, es = main(m, e) + print("m:%d e:%d sqrt: m:%d-%d e:%d" % (m, e, ms, mr, es)) + + x = Float32(1234.123456789) + fsqrt_test(x) + x = Float32(32.1) + fsqrt_test(x) + x = Float32(16.0) + fsqrt_test(x) + x = Float32(8.0) + fsqrt_test(x) + x = Float32(8.5) + fsqrt_test(x) + x = Float32(3.14159265358979323) + fsqrt_test(x) + x = Float32(12.99392923123123) + fsqrt_test(x) + x = Float32(0.123456) + fsqrt_test(x) + + + + +""" + +Notes: +https://pdfs.semanticscholar.org/5060/4e9aff0e37089c4ab9a376c3f35761ffe28b.pdf + +//This is the main code of integer sqrt function found here:http://verilogcodes.blogspot.com/2017/11/a-verilog-function-for-finding-square-root.html +// + +module testbench; + +reg [15:0] sqr; + +//Verilog function to find square root of a 32 bit number. +//The output is 16 bit. +function [15:0] sqrt; + input [31:0] num; //declare input + //intermediate signals. + reg [31:0] a; + reg [15:0] q; + reg [17:0] left,right,r; + integer i; +begin + //initialize all the variables. + a = num; + q = 0; + i = 0; + left = 0; //input to adder/sub + right = 0; //input to adder/sub + r = 0; //remainder + //run the calculations for 16 iterations. + for(i=0;i<16;i=i+1) begin + right = {q,r[17],1'b1}; + left = {r[15:0],a[31:30]}; + a = {a[29:0],2'b00}; //left shift by 2 bits. + if (r[17] == 1) //add if r is negative + r = left + right; + else //subtract if r is positive + r = left - right; + q = {q[14:0],!r[17]}; + end + sqrt = q; //final assignment of output. +end +endfunction //end of Function + + +c version (from paper linked from URL) + +unsigned squart(D, r) /*Non-Restoring sqrt*/ + unsigned D; /*D:32-bit unsigned integer to be square rooted */ + int *r; +{ + unsigned Q = 0; /*Q:16-bit unsigned integer (root)*/ + int R = 0; /*R:17-bit integer (remainder)*/ + int i; + for (i = 15;i>=0;i--) /*for each root bit*/ + { + if (R>=0) + { /*new remainder:*/ + R = R<<2)|((D>>(i+i))&3); + R = R-((Q<<2)|1); /*-Q01*/ + } + else + { /*new remainder:*/ + R = R<<2)|((D>>(i+i))&3); + R = R+((Q<<2)|3); /*+Q11*/ + } + if (R>=0) Q = Q<<1)|1; /*new Q:*/ + else Q = Q<<1)|0; /*new Q:*/ + } + + /*remainder adjusting*/ + if (R<0) R = R+((Q<<1)|1); + *r = R; /*return remainder*/ + return(Q); /*return root*/ +} + +From wikipedia page: + +short isqrt(short num) { + short res = 0; + short bit = 1 << 14; // The second-to-top bit is set: 1 << 30 for 32 bits + + // "bit" starts at the highest power of four <= the argument. + while (bit > num) + bit >>= 2; + + while (bit != 0) { + if (num >= res + bit) { + num -= res + bit; + res = (res >> 1) + bit; + } + else + res >>= 1; + bit >>= 2; + } + return res; +} + +""" diff --git a/src/ieee754/add/function_unit.py b/src/ieee754/add/function_unit.py new file mode 100644 index 00000000..108c84f3 --- /dev/null +++ b/src/ieee754/add/function_unit.py @@ -0,0 +1,44 @@ +from nmigen import Signal, Cat, Const, Mux, Module, Array +from nmigen.cli import main, verilog + +from nmigen_add_experiment import FPADD +from rstation_row import ReservationStationRow + +from math import log + +class FunctionUnit: + + def __init__(self, width, num_units): + """ Function Unit + + * width: bit-width of IEEE754. supported: 16, 32, 64 + * num_units: number of Reservation Stations + """ + self.width = width + + fus = [] + bsz = int(log(width) / log(2)) + for i in range(num_units): + mid = Const(i, bsz) + rs = ReservationStationRow(width, mid) + rs.name = "RS%d" % i + fus.append(rs) + self.fus = Array(fus) + + def elaborate(self, platform=None): + """ creates the HDL code-fragment for ReservationStationRow + """ + m = Module() + + return m + + +if __name__ == "__main__": + rs = ReservationStationRow(width=32, id_wid=Const(1,4) + main(alu, ports=[rs.in_a, rs.in_b, rs.out_z] + + # works... but don't use, just do "python fname.py convert -t v" + #print (verilog.convert(alu, ports=[ + # ports=alu.in_a.ports() + \ + # alu.in_b.ports() + \ + # alu.out_z.ports()) diff --git a/src/ieee754/add/inputgroup.py b/src/ieee754/add/inputgroup.py new file mode 100644 index 00000000..e1b775d4 --- /dev/null +++ b/src/ieee754/add/inputgroup.py @@ -0,0 +1,115 @@ +from nmigen import Module, Signal, Cat, Array, Const +from nmigen.lib.coding import PriorityEncoder +from math import log + +from fpbase import Trigger + + +class FPGetSyncOpsMod: + def __init__(self, width, num_ops=2): + self.width = width + self.num_ops = num_ops + inops = [] + outops = [] + for i in range(num_ops): + inops.append(Signal(width, reset_less=True)) + outops.append(Signal(width, reset_less=True)) + self.in_op = inops + self.out_op = outops + self.stb = Signal(num_ops) + self.ack = Signal() + self.ready = Signal(reset_less=True) + self.out_decode = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False))) + m.d.comb += self.out_decode.eq(self.ack & self.ready) + with m.If(self.out_decode): + for i in range(self.num_ops): + m.d.comb += [ + self.out_op[i].eq(self.in_op[i]), + ] + return m + + def ports(self): + return self.in_op + self.out_op + [self.stb, self.ack] + + +class FPOps(Trigger): + def __init__(self, width, num_ops): + Trigger.__init__(self) + self.width = width + self.num_ops = num_ops + + res = [] + for i in range(num_ops): + res.append(Signal(width)) + self.v = Array(res) + + def ports(self): + res = [] + for i in range(self.num_ops): + res.append(self.v[i]) + res.append(self.ack) + res.append(self.stb) + return res + + +class InputGroup: + def __init__(self, width, num_ops=2, num_rows=4): + self.width = width + self.num_ops = num_ops + self.num_rows = num_rows + self.mmax = int(log(self.num_rows) / log(2)) + self.rs = [] + self.mid = Signal(self.mmax, reset_less=True) # multiplex id + for i in range(num_rows): + self.rs.append(FPGetSyncOpsMod(width, num_ops)) + self.rs = Array(self.rs) + + self.out_op = FPOps(width, num_ops) + + def elaborate(self, platform): + m = Module() + + pe = PriorityEncoder(self.num_rows) + m.submodules.selector = pe + m.submodules.out_op = self.out_op + m.submodules += self.rs + + # connect priority encoder + in_ready = [] + for i in range(self.num_rows): + in_ready.append(self.rs[i].ready) + m.d.comb += pe.i.eq(Cat(*in_ready)) + + active = Signal(reset_less=True) + out_en = Signal(reset_less=True) + m.d.comb += active.eq(~pe.n) # encoder active + m.d.comb += out_en.eq(active & self.out_op.trigger) + + # encoder active: ack relevant input, record MID, pass output + with m.If(out_en): + rs = self.rs[pe.o] + m.d.sync += self.mid.eq(pe.o) + m.d.sync += rs.ack.eq(0) + m.d.sync += self.out_op.stb.eq(0) + for j in range(self.num_ops): + m.d.sync += self.out_op.v[j].eq(rs.out_op[j]) + with m.Else(): + m.d.sync += self.out_op.stb.eq(1) + # acks all default to zero + for i in range(self.num_rows): + m.d.sync += self.rs[i].ack.eq(1) + + return m + + def ports(self): + res = [] + for i in range(self.num_rows): + inop = self.rs[i] + res += inop.in_op + [inop.stb] + return self.out_op.ports() + res + [self.mid] + + diff --git a/src/ieee754/add/iocontrol.py b/src/ieee754/add/iocontrol.py new file mode 100644 index 00000000..3d823c9b --- /dev/null +++ b/src/ieee754/add/iocontrol.py @@ -0,0 +1,306 @@ +""" IO Control API + + Associated development bugs: + * http://bugs.libre-riscv.org/show_bug.cgi?id=64 + * http://bugs.libre-riscv.org/show_bug.cgi?id=57 + + Stage API: + --------- + + stage requires compliance with a strict API that may be + implemented in several means, including as a static class. + + Stages do not HOLD data, and they definitely do not contain + signalling (ready/valid). They do however specify the FORMAT + of the incoming and outgoing data, and they provide a means to + PROCESS that data (from incoming format to outgoing format). + + Stage Blocks really must be combinatorial blocks. It would be ok + to have input come in from sync'd sources (clock-driven) however by + doing so they would no longer be deterministic, and chaining such + blocks with such side-effects together could result in unexpected, + unpredictable, unreproduceable behaviour. + So generally to be avoided, then unless you know what you are doing. + + the methods of a stage instance must be as follows: + + * ispec() - Input data format specification. Takes a bit of explaining. + The requirements are: something that eventually derives from + nmigen Value must be returned *OR* an iterator or iterable + or sequence (list, tuple etc.) or generator must *yield* + thing(s) that (eventually) derive from the nmigen Value class. + + Complex to state, very simple in practice: + see test_buf_pipe.py for over 25 worked examples. + + * ospec() - Output data format specification. + format requirements identical to ispec. + + * process(m, i) - Optional function for processing ispec-formatted data. + returns a combinatorial block of a result that + may be assigned to the output, by way of the "nmoperator.eq" + function. Note that what is returned here can be + extremely flexible. Even a dictionary can be returned + as long as it has fields that match precisely with the + Record into which its values is intended to be assigned. + Again: see example unit tests for details. + + * setup(m, i) - Optional function for setting up submodules. + may be used for more complex stages, to link + the input (i) to submodules. must take responsibility + for adding those submodules to the module (m). + the submodules must be combinatorial blocks and + must have their inputs and output linked combinatorially. + + Both StageCls (for use with non-static classes) and Stage (for use + by static classes) are abstract classes from which, for convenience + and as a courtesy to other developers, anything conforming to the + Stage API may *choose* to derive. See Liskov Substitution Principle: + https://en.wikipedia.org/wiki/Liskov_substitution_principle + + StageChain: + ---------- + + A useful combinatorial wrapper around stages that chains them together + and then presents a Stage-API-conformant interface. By presenting + the same API as the stages it wraps, it can clearly be used recursively. + + ControlBase: + ----------- + + The base class for pipelines. Contains previous and next ready/valid/data. + Also has an extremely useful "connect" function that can be used to + connect a chain of pipelines and present the exact same prev/next + ready/valid/data API. + + Note: pipelines basically do not become pipelines as such until + handed to a derivative of ControlBase. ControlBase itself is *not* + strictly considered a pipeline class. Wishbone and AXI4 (master or + slave) could be derived from ControlBase, for example. +""" + +from nmigen import Signal, Cat, Const, Module, Value, Elaboratable +from nmigen.cli import verilog, rtlil +from nmigen.hdl.rec import Record + +from collections.abc import Sequence, Iterable +from collections import OrderedDict + +import nmoperator + + +class Object: + def __init__(self): + self.fields = OrderedDict() + + def __setattr__(self, k, v): + print ("kv", k, v) + if (k.startswith('_') or k in ["fields", "name", "src_loc"] or + k in dir(Object) or "fields" not in self.__dict__): + return object.__setattr__(self, k, v) + self.fields[k] = v + + def __getattr__(self, k): + if k in self.__dict__: + return object.__getattr__(self, k) + try: + return self.fields[k] + except KeyError as e: + raise AttributeError(e) + + def __iter__(self): + for x in self.fields.values(): # OrderedDict so order is preserved + if isinstance(x, Iterable): + yield from x + else: + yield x + + def eq(self, inp): + res = [] + for (k, o) in self.fields.items(): + i = getattr(inp, k) + print ("eq", o, i) + rres = o.eq(i) + if isinstance(rres, Sequence): + res += rres + else: + res.append(rres) + print (res) + return res + + def ports(self): # being called "keys" would be much better + return list(self) + + +class RecordObject(Record): + def __init__(self, layout=None, name=None): + Record.__init__(self, layout=layout or [], name=None) + + def __setattr__(self, k, v): + #print (dir(Record)) + if (k.startswith('_') or k in ["fields", "name", "src_loc"] or + k in dir(Record) or "fields" not in self.__dict__): + return object.__setattr__(self, k, v) + self.fields[k] = v + #print ("RecordObject setattr", k, v) + if isinstance(v, Record): + newlayout = {k: (k, v.layout)} + elif isinstance(v, Value): + newlayout = {k: (k, v.shape())} + else: + newlayout = {k: (k, nmoperator.shape(v))} + self.layout.fields.update(newlayout) + + def __iter__(self): + for x in self.fields.values(): # remember: fields is an OrderedDict + if isinstance(x, Iterable): + yield from x # a bit like flatten (nmigen.tools) + else: + yield x + + def ports(self): # would be better being called "keys" + return list(self) + + +class PrevControl(Elaboratable): + """ contains signals that come *from* the previous stage (both in and out) + * valid_i: previous stage indicating all incoming data is valid. + may be a multi-bit signal, where all bits are required + to be asserted to indicate "valid". + * ready_o: output to next stage indicating readiness to accept data + * data_i : an input - MUST be added by the USER of this class + """ + + def __init__(self, i_width=1, stage_ctl=False): + self.stage_ctl = stage_ctl + self.valid_i = Signal(i_width, name="p_valid_i") # prev >>in self + self._ready_o = Signal(name="p_ready_o") # prev < 1: + # multi-bit case: valid only when valid_i is all 1s + all1s = Const(-1, (len(self.valid_i), False)) + valid_i = (self.valid_i == all1s) + else: + # single-bit valid_i case + valid_i = self.valid_i + + # when stage indicates not ready, incoming data + # must "appear" to be not ready too + if self.stage_ctl: + valid_i = valid_i & self.s_ready_o + + return valid_i + + def elaborate(self, platform): + m = Module() + m.d.comb += self.trigger.eq(self.valid_i_test & self.ready_o) + return m + + def eq(self, i): + return [nmoperator.eq(self.data_i, i.data_i), + self.ready_o.eq(i.ready_o), + self.valid_i.eq(i.valid_i)] + + def __iter__(self): + yield self.valid_i + yield self.ready_o + if hasattr(self.data_i, "ports"): + yield from self.data_i.ports() + elif isinstance(self.data_i, Sequence): + yield from self.data_i + else: + yield self.data_i + + def ports(self): + return list(self) + + +class NextControl(Elaboratable): + """ contains the signals that go *to* the next stage (both in and out) + * valid_o: output indicating to next stage that data is valid + * ready_i: input from next stage indicating that it can accept data + * data_o : an output - MUST be added by the USER of this class + """ + def __init__(self, stage_ctl=False): + self.stage_ctl = stage_ctl + self.valid_o = Signal(name="n_valid_o") # self out>> next + self.ready_i = Signal(name="n_ready_i") # self < 1: + r_data = Array(r_data) + p_valid_i = Array(p_valid_i) + n_ready_in = Array(n_ready_in) + data_valid = Array(data_valid) + + nirn = Signal(reset_less=True) + m.d.comb += nirn.eq(~self.n.ready_i) + mid = self.p_mux.m_id + for i in range(p_len): + m.d.comb += data_valid[i].eq(0) + m.d.comb += n_ready_in[i].eq(1) + m.d.comb += p_valid_i[i].eq(0) + m.d.comb += self.p[i].ready_o.eq(0) + m.d.comb += p_valid_i[mid].eq(self.p_mux.active) + m.d.comb += self.p[mid].ready_o.eq(~data_valid[mid] | self.n.ready_i) + m.d.comb += n_ready_in[mid].eq(nirn & data_valid[mid]) + anyvalid = Signal(i, reset_less=True) + av = [] + for i in range(p_len): + av.append(data_valid[i]) + anyvalid = Cat(*av) + m.d.comb += self.n.valid_o.eq(anyvalid.bool()) + m.d.comb += data_valid[mid].eq(p_valid_i[mid] | \ + (n_ready_in[mid] & data_valid[mid])) + + for i in range(p_len): + vr = Signal(reset_less=True) + m.d.comb += vr.eq(self.p[i].valid_i & self.p[i].ready_o) + with m.If(vr): + m.d.comb += eq(r_data[i], self.p[i].data_i) + + m.d.comb += eq(self.n.data_o, self.process(r_data[mid])) + + return m + + +class CombMuxOutPipe(CombMultiOutPipeline): + def __init__(self, stage, n_len): + # HACK: stage is also the n-way multiplexer + CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage) + + # HACK: n-mux is also the stage... so set the muxid equal to input mid + stage.m_id = self.p.data_i.mid + + + +class InputPriorityArbiter(Elaboratable): + """ arbitration module for Input-Mux pipe, baed on PriorityEncoder + """ + def __init__(self, pipe, num_rows): + self.pipe = pipe + self.num_rows = num_rows + self.mmax = int(log(self.num_rows) / log(2)) + self.m_id = Signal(self.mmax, reset_less=True) # multiplex id + self.active = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + + assert len(self.pipe.p) == self.num_rows, \ + "must declare input to be same size" + pe = PriorityEncoder(self.num_rows) + m.submodules.selector = pe + + # connect priority encoder + in_ready = [] + for i in range(self.num_rows): + p_valid_i = Signal(reset_less=True) + m.d.comb += p_valid_i.eq(self.pipe.p[i].valid_i_test) + in_ready.append(p_valid_i) + m.d.comb += pe.i.eq(Cat(*in_ready)) # array of input "valids" + m.d.comb += self.active.eq(~pe.n) # encoder active (one input valid) + m.d.comb += self.m_id.eq(pe.o) # output one active input + + return m + + def ports(self): + return [self.m_id, self.active] + + + +class PriorityCombMuxInPipe(CombMultiInPipeline): + """ an example of how to use the combinatorial pipeline. + """ + + def __init__(self, stage, p_len=2): + p_mux = InputPriorityArbiter(self, p_len) + CombMultiInPipeline.__init__(self, stage, p_len, p_mux) + + +if __name__ == '__main__': + + dut = PriorityCombMuxInPipe(ExampleStage) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_combpipe.il", "w") as f: + f.write(vl) diff --git a/src/ieee754/add/nmigen_add_experiment.py b/src/ieee754/add/nmigen_add_experiment.py new file mode 100644 index 00000000..ecb1d35b --- /dev/null +++ b/src/ieee754/add/nmigen_add_experiment.py @@ -0,0 +1,28 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen.cli import main, verilog +from fpadd.statemachine import FPADDBase, FPADD +from fpadd.pipeline import FPADDMuxInOut + +if __name__ == "__main__": + if True: + alu = FPADD(width=32, id_wid=5, single_cycle=True) + main(alu, ports=alu.rs[0][0].ports() + \ + alu.rs[0][1].ports() + \ + alu.res[0].ports() + \ + [alu.ids.in_mid, alu.ids.out_mid]) + else: + alu = FPADDBase(width=32, id_wid=5, single_cycle=True) + main(alu, ports=[alu.in_a, alu.in_b] + \ + alu.in_t.ports() + \ + alu.out_z.ports() + \ + [alu.in_mid, alu.out_mid]) + + + # works... but don't use, just do "python fname.py convert -t v" + #print (verilog.convert(alu, ports=[ + # ports=alu.in_a.ports() + \ + # alu.in_b.ports() + \ + # alu.out_z.ports()) diff --git a/src/ieee754/add/nmigen_div_experiment.py b/src/ieee754/add/nmigen_div_experiment.py new file mode 100644 index 00000000..a7e215cb --- /dev/null +++ b/src/ieee754/add/nmigen_div_experiment.py @@ -0,0 +1,246 @@ +# IEEE Floating Point Divider (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module, Signal, Const, Cat +from nmigen.cli import main, verilog + +from fpbase import FPNumIn, FPNumOut, FPOpIn, FPOpOut, Overflow, FPBase, FPState +from singlepipe import eq + +class Div: + def __init__(self, width): + self.width = width + self.quot = Signal(width) # quotient + self.dor = Signal(width) # divisor + self.dend = Signal(width) # dividend + self.rem = Signal(width) # remainder + self.count = Signal(7) # loop count + + self.czero = Const(0, width) + + def reset(self, m): + m.d.sync += [ + self.quot.eq(self.czero), + self.rem.eq(self.czero), + self.count.eq(Const(0, 7)) + ] + + +class FPDIV(FPBase): + + def __init__(self, width): + FPBase.__init__(self) + self.width = width + + self.in_a = FPOpIn(width) + self.in_b = FPOpIn(width) + self.out_z = FPOpOut(width) + + self.states = [] + + def add_state(self, state): + self.states.append(state) + return state + + def elaborate(self, platform=None): + """ creates the HDL code-fragment for FPDiv + """ + m = Module() + + # Latches + a = FPNumIn(None, self.width, False) + b = FPNumIn(None, self.width, False) + z = FPNumOut(self.width, False) + + div = Div(a.m_width*2 + 3) # double the mantissa width plus g/r/sticky + + of = Overflow() + m.submodules.in_a = a + m.submodules.in_b = b + m.submodules.z = z + m.submodules.of = of + + m.d.comb += a.v.eq(self.in_a.v) + m.d.comb += b.v.eq(self.in_b.v) + + with m.FSM() as fsm: + + # ****** + # gets operand a + + with m.State("get_a"): + res = self.get_op(m, self.in_a, a, "get_b") + m.d.sync += eq([a, self.in_a.ready_o], res) + + # ****** + # gets operand b + + with m.State("get_b"): + res = self.get_op(m, self.in_b, b, "special_cases") + m.d.sync += eq([b, self.in_b.ready_o], res) + + # ****** + # special cases: NaNs, infs, zeros, denormalised + # NOTE: some of these are unique to div. see "Special Operations" + # https://steve.hollasch.net/cgindex/coding/ieeefloat.html + + with m.State("special_cases"): + + # if a is NaN or b is NaN return NaN + with m.If(a.is_nan | b.is_nan): + m.next = "put_z" + m.d.sync += z.nan(1) + + # if a is Inf and b is Inf return NaN + with m.Elif(a.is_inf & b.is_inf): + m.next = "put_z" + m.d.sync += z.nan(1) + + # if a is inf return inf (or NaN if b is zero) + with m.Elif(a.is_inf): + m.next = "put_z" + m.d.sync += z.inf(a.s ^ b.s) + + # if b is inf return zero + with m.Elif(b.is_inf): + m.next = "put_z" + m.d.sync += z.zero(a.s ^ b.s) + + # if a is zero return zero (or NaN if b is zero) + with m.Elif(a.is_zero): + m.next = "put_z" + # if b is zero return NaN + with m.If(b.is_zero): + m.d.sync += z.nan(1) + with m.Else(): + m.d.sync += z.zero(a.s ^ b.s) + + # if b is zero return Inf + with m.Elif(b.is_zero): + m.next = "put_z" + m.d.sync += z.inf(a.s ^ b.s) + + # Denormalised Number checks + with m.Else(): + m.next = "normalise_a" + self.denormalise(m, a) + self.denormalise(m, b) + + # ****** + # normalise_a + + with m.State("normalise_a"): + self.op_normalise(m, a, "normalise_b") + + # ****** + # normalise_b + + with m.State("normalise_b"): + self.op_normalise(m, b, "divide_0") + + # ****** + # First stage of divide. initialise state + + with m.State("divide_0"): + m.next = "divide_1" + m.d.sync += [ + z.s.eq(a.s ^ b.s), # sign + z.e.eq(a.e - b.e), # exponent + div.dend.eq(a.m<<(a.m_width+3)), # 3 bits for g/r/sticky + div.dor.eq(b.m), + ] + div.reset(m) + + # ****** + # Second stage of divide. + + with m.State("divide_1"): + m.next = "divide_2" + m.d.sync += [ + div.quot.eq(div.quot << 1), + div.rem.eq(Cat(div.dend[-1], div.rem[0:])), + div.dend.eq(div.dend << 1), + ] + + # ****** + # Third stage of divide. + # This stage ends by jumping out to divide_3 + # However it defaults to jumping to divide_1 (which comes back here) + + with m.State("divide_2"): + with m.If(div.rem >= div.dor): + m.d.sync += [ + div.quot[0].eq(1), + div.rem.eq(div.rem - div.dor), + ] + with m.If(div.count == div.width-2): + m.next = "divide_3" + with m.Else(): + m.next = "divide_1" + m.d.sync += [ + div.count.eq(div.count + 1), + ] + + # ****** + # Fourth stage of divide. + + with m.State("divide_3"): + m.next = "normalise_1" + m.d.sync += [ + z.m.eq(div.quot[3:]), + of.guard.eq(div.quot[2]), + of.round_bit.eq(div.quot[1]), + of.sticky.eq(div.quot[0] | (div.rem != 0)) + ] + + # ****** + # First stage of normalisation. + + with m.State("normalise_1"): + self.normalise_1(m, z, of, "normalise_2") + + # ****** + # Second stage of normalisation. + + with m.State("normalise_2"): + self.normalise_2(m, z, of, "round") + + # ****** + # rounding stage + + with m.State("round"): + self.roundz(m, z, of.roundz) + m.next = "corrections" + + # ****** + # correction stage + + with m.State("corrections"): + self.corrections(m, z, "pack") + + # ****** + # pack stage + + with m.State("pack"): + self.pack(m, z, "put_z") + + # ****** + # put_z stage + + with m.State("put_z"): + self.put_z(m, z, self.out_z, "get_a") + + return m + + +if __name__ == "__main__": + alu = FPDIV(width=32) + main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports()) + + + # works... but don't use, just do "python fname.py convert -t v" + #print (verilog.convert(alu, ports=[ + # ports=alu.in_a.ports() + \ + # alu.in_b.ports() + \ + # alu.out_z.ports()) diff --git a/src/ieee754/add/nmoperator.py b/src/ieee754/add/nmoperator.py new file mode 100644 index 00000000..bd5e5544 --- /dev/null +++ b/src/ieee754/add/nmoperator.py @@ -0,0 +1,171 @@ +""" nmigen operator functions / utils + + eq: + -- + + a strategically very important function that is identical in function + to nmigen's Signal.eq function, except it may take objects, or a list + of objects, or a tuple of objects, and where objects may also be + Records. +""" + +from nmigen import Signal, Cat, Const, Mux, Module, Value, Elaboratable +from nmigen.cli import verilog, rtlil +from nmigen.lib.fifo import SyncFIFO, SyncFIFOBuffered +from nmigen.hdl.ast import ArrayProxy +from nmigen.hdl.rec import Record, Layout + +from abc import ABCMeta, abstractmethod +from collections.abc import Sequence, Iterable +from collections import OrderedDict +from queue import Queue +import inspect + + +class Visitor2: + """ a helper class for iterating twin-argument compound data structures. + + Record is a special (unusual, recursive) case, where the input may be + specified as a dictionary (which may contain further dictionaries, + recursively), where the field names of the dictionary must match + the Record's field spec. Alternatively, an object with the same + member names as the Record may be assigned: it does not have to + *be* a Record. + + ArrayProxy is also special-cased, it's a bit messy: whilst ArrayProxy + has an eq function, the object being assigned to it (e.g. a python + object) might not. despite the *input* having an eq function, + that doesn't help us, because it's the *ArrayProxy* that's being + assigned to. so.... we cheat. use the ports() function of the + python object, enumerate them, find out the list of Signals that way, + and assign them. + """ + def iterator2(self, o, i): + if isinstance(o, dict): + yield from self.dict_iter2(o, i) + + if not isinstance(o, Sequence): + o, i = [o], [i] + for (ao, ai) in zip(o, i): + #print ("visit", fn, ao, ai) + if isinstance(ao, Record): + yield from self.record_iter2(ao, ai) + elif isinstance(ao, ArrayProxy) and not isinstance(ai, Value): + yield from self.arrayproxy_iter2(ao, ai) + else: + yield (ao, ai) + + def dict_iter2(self, o, i): + for (k, v) in o.items(): + print ("d-iter", v, i[k]) + yield (v, i[k]) + return res + + def _not_quite_working_with_all_unit_tests_record_iter2(self, ao, ai): + print ("record_iter2", ao, ai, type(ao), type(ai)) + if isinstance(ai, Value): + if isinstance(ao, Sequence): + ao, ai = [ao], [ai] + for o, i in zip(ao, ai): + yield (o, i) + return + for idx, (field_name, field_shape, _) in enumerate(ao.layout): + if isinstance(field_shape, Layout): + val = ai.fields + else: + val = ai + if hasattr(val, field_name): # check for attribute + val = getattr(val, field_name) + else: + val = val[field_name] # dictionary-style specification + yield from self.iterator2(ao.fields[field_name], val) + + def record_iter2(self, ao, ai): + for idx, (field_name, field_shape, _) in enumerate(ao.layout): + if isinstance(field_shape, Layout): + val = ai.fields + else: + val = ai + if hasattr(val, field_name): # check for attribute + val = getattr(val, field_name) + else: + val = val[field_name] # dictionary-style specification + yield from self.iterator2(ao.fields[field_name], val) + + def arrayproxy_iter2(self, ao, ai): + for p in ai.ports(): + op = getattr(ao, p.name) + print ("arrayproxy - p", p, p.name) + yield from self.iterator2(op, p) + + +class Visitor: + """ a helper class for iterating single-argument compound data structures. + similar to Visitor2. + """ + def iterate(self, i): + """ iterate a compound structure recursively using yield + """ + if not isinstance(i, Sequence): + i = [i] + for ai in i: + #print ("iterate", ai) + if isinstance(ai, Record): + #print ("record", list(ai.layout)) + yield from self.record_iter(ai) + elif isinstance(ai, ArrayProxy) and not isinstance(ai, Value): + yield from self.array_iter(ai) + else: + yield ai + + def record_iter(self, ai): + for idx, (field_name, field_shape, _) in enumerate(ai.layout): + if isinstance(field_shape, Layout): + val = ai.fields + else: + val = ai + if hasattr(val, field_name): # check for attribute + val = getattr(val, field_name) + else: + val = val[field_name] # dictionary-style specification + #print ("recidx", idx, field_name, field_shape, val) + yield from self.iterate(val) + + def array_iter(self, ai): + for p in ai.ports(): + yield from self.iterate(p) + + +def eq(o, i): + """ makes signals equal: a helper routine which identifies if it is being + passed a list (or tuple) of objects, or signals, or Records, and calls + the objects' eq function. + """ + res = [] + for (ao, ai) in Visitor2().iterator2(o, i): + rres = ao.eq(ai) + if not isinstance(rres, Sequence): + rres = [rres] + res += rres + return res + + +def shape(i): + #print ("shape", i) + r = 0 + for part in list(i): + #print ("shape?", part) + s, _ = part.shape() + r += s + return r, False + + +def cat(i): + """ flattens a compound structure recursively using Cat + """ + from nmigen.tools import flatten + #res = list(flatten(i)) # works (as of nmigen commit f22106e5) HOWEVER... + res = list(Visitor().iterate(i)) # needed because input may be a sequence + return Cat(*res) + + diff --git a/src/ieee754/add/pipeline.py b/src/ieee754/add/pipeline.py new file mode 100644 index 00000000..afcee743 --- /dev/null +++ b/src/ieee754/add/pipeline.py @@ -0,0 +1,394 @@ +""" Example 5: Making use of PyRTL and Introspection. """ + +from collections.abc import Sequence + +from nmigen import Signal +from nmigen.hdl.rec import Record +from nmigen import tracer +from nmigen.compat.fhdl.bitcontainer import value_bits_sign +from contextlib import contextmanager + +from nmoperator import eq +from singlepipe import StageCls, ControlBase, BufferedHandshake +from singlepipe import UnbufferedPipeline + + +# The following example shows how pyrtl can be used to make some interesting +# hardware structures using python introspection. In particular, this example +# makes a N-stage pipeline structure. Any specific pipeline is then a derived +# class of SimplePipeline where methods with names starting with "stage" are +# stages, and new members with names not starting with "_" are to be registered +# for the next stage. + +def like(value, rname, pipe, pipemode=False): + if isinstance(value, ObjectProxy): + return ObjectProxy.like(pipe, value, pipemode=pipemode, + name=rname, reset_less=True) + else: + return Signal(value_bits_sign(value), name=rname, + reset_less=True) + return Signal.like(value, name=rname, reset_less=True) + +def get_assigns(_assigns): + assigns = [] + for e in _assigns: + if isinstance(e, ObjectProxy): + assigns += get_assigns(e._assigns) + else: + assigns.append(e) + return assigns + + +def get_eqs(_eqs): + eqs = [] + for e in _eqs: + if isinstance(e, ObjectProxy): + eqs += get_eqs(e._eqs) + else: + eqs.append(e) + return eqs + + +class ObjectProxy: + def __init__(self, m, name=None, pipemode=False, syncmode=True): + self._m = m + if name is None: + name = tracer.get_var_name(default=None) + self.name = name + self._pipemode = pipemode + self._syncmode = syncmode + self._eqs = {} + self._assigns = [] + self._preg_map = {} + + @classmethod + def like(cls, m, value, pipemode=False, name=None, src_loc_at=0, **kwargs): + name = name or tracer.get_var_name(depth=2 + src_loc_at, + default="$like") + + src_loc_at_1 = 1 + src_loc_at + r = ObjectProxy(m, value.name, pipemode) + #for a, aname in value._preg_map.items(): + # r._preg_map[aname] = like(a, aname, m, pipemode) + for a in value.ports(): + aname = a.name + r._preg_map[aname] = like(a, aname, m, pipemode) + return r + + def __repr__(self): + subobjs = [] + for a in self.ports(): + aname = a.name + ai = self._preg_map[aname] + subobjs.append(repr(ai)) + return "" % subobjs + + def get_specs(self, liked=False): + res = [] + for k, v in self._preg_map.items(): + #v = like(v, k, stage._m) + res.append(v) + if isinstance(v, ObjectProxy): + res += v.get_specs() + return res + + def eq(self, i): + print ("ObjectProxy eq", self, i) + res = [] + for a in self.ports(): + aname = a.name + ai = i._preg_map[aname] + res.append(a.eq(ai)) + return res + + def ports(self): + res = [] + for aname, a in self._preg_map.items(): + if isinstance(a, Signal) or isinstance(a, ObjectProxy) or \ + isinstance(a, Record): + res.append(a) + #print ("ObjectPorts", res) + return res + + def __getattr__(self, name): + try: + v = self._preg_map[name] + return v + #return like(v, name, self._m) + except KeyError: + raise AttributeError( + 'error, no pipeline register "%s" defined for OP %s' + % (name, self.name)) + + def __setattr__(self, name, value): + if name.startswith('_') or name in ['name', 'ports', 'eq', 'like']: + # do not do anything tricky with variables starting with '_' + object.__setattr__(self, name, value) + return + #rname = "%s_%s" % (self.name, name) + rname = name + new_pipereg = like(value, rname, self._m, self._pipemode) + self._preg_map[name] = new_pipereg + #object.__setattr__(self, name, new_pipereg) + if self._pipemode: + #print ("OP pipemode", self._syncmode, new_pipereg, value) + assign = eq(new_pipereg, value) + if self._syncmode: + self._m.d.sync += assign + else: + self._m.d.comb += assign + elif self._m: + #print ("OP !pipemode assign", new_pipereg, value, type(value)) + self._m.d.comb += eq(new_pipereg, value) + else: + #print ("OP !pipemode !m", new_pipereg, value, type(value)) + self._assigns += eq(new_pipereg, value) + if isinstance(value, ObjectProxy): + #print ("OP, defer assigns:", value._assigns) + self._assigns += value._assigns + self._eqs.append(value._eqs) + + +class PipelineStage: + """ Pipeline builder stage with auto generation of pipeline registers. + """ + + def __init__(self, name, m, prev=None, pipemode=False, ispec=None): + self._m = m + self._stagename = name + self._preg_map = {'__nextstage__': {}} + self._prev_stage = prev + self._ispec = ispec + if ispec: + self._preg_map[self._stagename] = ispec + if prev: + print ("prev", prev._stagename, prev._preg_map) + #if prev._stagename in prev._preg_map: + # m = prev._preg_map[prev._stagename] + # self._preg_map[prev._stagename] = m + if '__nextstage__' in prev._preg_map: + m = prev._preg_map['__nextstage__'] + m = likedict(m) + self._preg_map[self._stagename] = m + #for k, v in m.items(): + #m[k] = like(v, k, self._m) + print ("make current", self._stagename, m) + self._pipemode = pipemode + self._eqs = {} + self._assigns = [] + + def __getattribute__(self, name): + if name.startswith('_'): + return object.__getattribute__(self, name) + #if name in self._preg_map['__nextstage__']: + # return self._preg_map['__nextstage__'][name] + try: + print ("getattr", name, object.__getattribute__(self, '_preg_map')) + v = self._preg_map[self._stagename][name] + return v + #return like(v, name, self._m) + except KeyError: + raise AttributeError( + 'error, no pipeline register "%s" defined for stage %s' + % (name, self._stagename)) + + def __setattr__(self, name, value): + if name.startswith('_'): + # do not do anything tricky with variables starting with '_' + object.__setattr__(self, name, value) + return + pipereg_id = self._stagename + rname = 'pipereg_' + pipereg_id + '_' + name + new_pipereg = like(value, rname, self._m, self._pipemode) + next_stage = '__nextstage__' + if next_stage not in self._preg_map: + self._preg_map[next_stage] = {} + self._preg_map[next_stage][name] = new_pipereg + print ("setattr", name, value, self._preg_map) + if self._pipemode: + self._eqs[name] = new_pipereg + assign = eq(new_pipereg, value) + print ("pipemode: append", new_pipereg, value, assign) + if isinstance(value, ObjectProxy): + print ("OP, assigns:", value._assigns) + self._assigns += value._assigns + self._eqs[name]._eqs = value._eqs + #self._m.d.comb += assign + self._assigns += assign + elif self._m: + print ("!pipemode: assign", new_pipereg, value) + assign = eq(new_pipereg, value) + self._m.d.sync += assign + else: + print ("!pipemode !m: defer assign", new_pipereg, value) + assign = eq(new_pipereg, value) + self._eqs[name] = new_pipereg + self._assigns += assign + if isinstance(value, ObjectProxy): + print ("OP, defer assigns:", value._assigns) + self._assigns += value._assigns + self._eqs[name]._eqs = value._eqs + +def likelist(specs): + res = [] + for v in specs: + res.append(like(v, v.name, None, pipemode=True)) + return res + +def likedict(specs): + if not isinstance(specs, dict): + return like(specs, specs.name, None, pipemode=True) + res = {} + for k, v in specs.items(): + res[k] = likedict(v) + return res + + +class AutoStage(StageCls): + def __init__(self, inspecs, outspecs, eqs, assigns): + self.inspecs, self.outspecs = inspecs, outspecs + self.eqs, self.assigns = eqs, assigns + #self.o = self.ospec() + def ispec(self): return likedict(self.inspecs) + def ospec(self): return likedict(self.outspecs) + + def process(self, i): + print ("stage process", i) + return self.eqs + + def setup(self, m, i): + print ("stage setup i", i, m) + print ("stage setup inspecs", self.inspecs) + print ("stage setup outspecs", self.outspecs) + print ("stage setup eqs", self.eqs) + #self.o = self.ospec() + m.d.comb += eq(self.inspecs, i) + #m.d.comb += eq(self.outspecs, self.eqs) + #m.d.comb += eq(self.o, i) + + +class AutoPipe(UnbufferedPipeline): + def __init__(self, stage, assigns): + UnbufferedPipeline.__init__(self, stage) + self.assigns = assigns + + def elaborate(self, platform): + m = UnbufferedPipeline.elaborate(self, platform) + m.d.comb += self.assigns + print ("assigns", self.assigns, m) + return m + + +class PipeManager: + def __init__(self, m, pipemode=False, pipetype=None): + self.m = m + self.pipemode = pipemode + self.pipetype = pipetype + + @contextmanager + def Stage(self, name, prev=None, ispec=None): + if ispec: + ispec = likedict(ispec) + print ("start stage", name, ispec) + stage = PipelineStage(name, None, prev, self.pipemode, ispec=ispec) + try: + yield stage, self.m #stage._m + finally: + pass + if self.pipemode: + if stage._ispec: + print ("use ispec", stage._ispec) + inspecs = stage._ispec + else: + inspecs = self.get_specs(stage, name) + #inspecs = likedict(inspecs) + outspecs = self.get_specs(stage, '__nextstage__', liked=True) + print ("stage inspecs", name, inspecs) + print ("stage outspecs", name, outspecs) + eqs = stage._eqs # get_eqs(stage._eqs) + assigns = get_assigns(stage._assigns) + print ("stage eqs", name, eqs) + print ("stage assigns", name, assigns) + s = AutoStage(inspecs, outspecs, eqs, assigns) + self.stages.append(s) + print ("end stage", name, self.pipemode, "\n") + + def get_specs(self, stage, name, liked=False): + return stage._preg_map[name] + if name in stage._preg_map: + res = [] + for k, v in stage._preg_map[name].items(): + #v = like(v, k, stage._m) + res.append(v) + #if isinstance(v, ObjectProxy): + # res += v.get_specs() + return res + return {} + + def __enter__(self): + self.stages = [] + return self + + def __exit__(self, *args): + print ("exit stage", args) + pipes = [] + cb = ControlBase() + for s in self.stages: + print ("stage specs", s, s.inspecs, s.outspecs) + if self.pipetype == 'buffered': + p = BufferedHandshake(s) + else: + p = AutoPipe(s, s.assigns) + pipes.append(p) + self.m.submodules += p + + self.m.d.comb += cb.connect(pipes) + + +class SimplePipeline: + """ Pipeline builder with auto generation of pipeline registers. + """ + + def __init__(self, m): + self._m = m + self._pipeline_register_map = {} + self._current_stage_num = 0 + + def _setup(self): + stage_list = [] + for method in dir(self): + if method.startswith('stage'): + stage_list.append(method) + for stage in sorted(stage_list): + stage_method = getattr(self, stage) + stage_method() + self._current_stage_num += 1 + + def __getattr__(self, name): + try: + return self._pipeline_register_map[self._current_stage_num][name] + except KeyError: + raise AttributeError( + 'error, no pipeline register "%s" defined for stage %d' + % (name, self._current_stage_num)) + + def __setattr__(self, name, value): + if name.startswith('_'): + # do not do anything tricky with variables starting with '_' + object.__setattr__(self, name, value) + return + next_stage = self._current_stage_num + 1 + pipereg_id = str(self._current_stage_num) + 'to' + str(next_stage) + rname = 'pipereg_' + pipereg_id + '_' + name + #new_pipereg = Signal(value_bits_sign(value), name=rname, + # reset_less=True) + if isinstance(value, ObjectProxy): + new_pipereg = ObjectProxy.like(self._m, value, + name=rname, reset_less = True) + else: + new_pipereg = Signal.like(value, name=rname, reset_less = True) + if next_stage not in self._pipeline_register_map: + self._pipeline_register_map[next_stage] = {} + self._pipeline_register_map[next_stage][name] = new_pipereg + self._m.d.sync += eq(new_pipereg, value) + diff --git a/src/ieee754/add/pipeline_example.py b/src/ieee754/add/pipeline_example.py new file mode 100644 index 00000000..799caf6d --- /dev/null +++ b/src/ieee754/add/pipeline_example.py @@ -0,0 +1,204 @@ +""" Example 5: Making use of PyRTL and Introspection. """ + +from nmigen import Module, Signal, Const +from nmigen.cli import main, verilog, rtlil + + +from pipeline import SimplePipeline, ObjectProxy, PipeManager + + +class SimplePipelineExample(SimplePipeline): + """ A very simple pipeline to show how registers are inferred. """ + + def __init__(self, pipe): + SimplePipeline.__init__(self, pipe) + self._loopback = Signal(4) + self._setup() + + def stage0(self): + self.n = ~self._loopback + + def stage1(self): + self.n = self.n + 2 + + def stage2(self): + localv = Signal(4) + self._pipe.comb += localv.eq(2) + self.n = self.n << localv + + def stage3(self): + self.n = ~self.n + + def stage4(self): + self._pipe.sync += self._loopback.eq(self.n + 3) + + +class ObjectBasedPipelineExample(SimplePipeline): + """ A very simple pipeline to show how registers are inferred. """ + + def __init__(self, m): + SimplePipeline.__init__(self, m) + self._loopback = Signal(4) + o = ObjectProxy(m) + o.a = Signal(4) + o.b = Signal(4) + self._obj = o + self._setup() + + def stage0(self): + self.n = ~self._loopback + self.o = self._obj + + def stage1(self): + self.n = self.n + self.o.a + o = ObjectProxy(self._m) + o.c = self.n + o.d = self.o.b + self.n + Const(5) + self.o = o + + def stage2(self): + localv = Signal(4) + self._m.d.comb += localv.eq(2) + self.n = self.n << localv + o = ObjectProxy(self._m) + o.e = self.n + self.o.c + self.o.d + self.o = o + + def stage3(self): + self.n = ~self.n + self.o = self.o + self.o.e = self.o.e + self.n + + def stage4(self): + self._m.d.sync += self._loopback.eq(self.n + 3 + self.o.e) + + +class PipeModule: + + def __init__(self): + self.m = Module() + self.p = ObjectBasedPipelineExample(self.m) + + def elaborate(self, platform=None): + return self.m + + +class PipelineStageExample: + + def __init__(self): + self._loopback = Signal(4, name="loopback") + + def elaborate(self, platform=None): + + m = Module() + + with PipeManager(m, pipemode=True) as pipe: + + ispec={'loopback': self._loopback} + with pipe.Stage("first", ispec=ispec) as (p, m): + p.n = ~p.loopback + with pipe.Stage("second", p) as (p, m): + #p.n = ~self._loopback + 2 + p.n = p.n + Const(2) + with pipe.Stage("third", p) as (p, m): + #p.n = ~self._loopback + 5 + localv = Signal(4) + m.d.comb += localv.eq(2) + p.n = p.n << localv + Const(1) + #p.m = p.n + 2 + + print (pipe.stages) + + return m + +class PipelineStageObjectExample: + + def __init__(self): + self.loopback = Signal(4) + + def elaborate(self, platform=None): + + m = Module() + + o = ObjectProxy(None, pipemode=False) + o.a = Signal(4) + o.b = Signal(4) + self.obj = o + + localv2 = Signal(4) + m.d.sync += localv2.eq(localv2 + 3) + + #m.d.comb += self.obj.a.eq(localv2 + 1) + #m.d.sync += self._loopback.eq(localv2) + + ispec= {'loopback': self.loopback, 'obj': self.obj} + with PipeManager(m, pipemode=True) as pipe: + + with pipe.Stage("first", ispec=ispec) as (p, m): + p.n = ~p.loopback + p.o = p.obj + with pipe.Stage("second", p) as (p, m): + #p.n = ~self.loopback + 2 + localn = Signal(4) + m.d.comb += localn.eq(p.n) + o = ObjectProxy(None, pipemode=False) + o.c = localn + o.d = p.o.b + localn + Const(5) + p.n = localn + p.o = o + with pipe.Stage("third", p) as (p, m): + #p.n = ~self._loopback + 5 + localv = Signal(4) + m.d.comb += localv.eq(2) + p.n = p.n << localv + o = ObjectProxy(None, pipemode=False) + o.e = p.n + p.o.c + p.o.d + p.o = o + + print ("stages", pipe.stages) + + return m + + +class PipelineStageObjectExample2: + + def __init__(self): + self._loopback = Signal(4) + + def elaborate(self, platform=None): + + m = Module() + + ispec= [self._loopback] + with PipeManager(m, pipemode=True) as pipe: + + with pipe.Stage("first", + ispec=ispec) as (p, m): + p.n = ~self._loopback + o = ObjectProxy(None, pipemode=False) + o.b = ~self._loopback + Const(5) + p.o = o + + print ("stages", pipe.stages) + + return m + + + +if __name__ == "__main__": + example = PipeModule() + with open("pipe_module.il", "w") as f: + f.write(rtlil.convert(example, ports=[ + example.p._loopback, + ])) + example = PipelineStageExample() + with open("pipe_stage_module.il", "w") as f: + f.write(rtlil.convert(example, ports=[ + example._loopback, + ])) + #exit(0) + example = PipelineStageObjectExample() + with open("pipe_stage_object_module.il", "w") as f: + f.write(rtlil.convert(example, ports=[ + example.loopback, + ])) diff --git a/src/ieee754/add/queue.py b/src/ieee754/add/queue.py new file mode 100644 index 00000000..0038953d --- /dev/null +++ b/src/ieee754/add/queue.py @@ -0,0 +1,190 @@ +# Copyright (c) 2014 - 2019 The Regents of the University of +# California (Regents). All Rights Reserved. Redistribution and use in +# source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# * Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# two paragraphs of disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# two paragraphs of disclaimer in the documentation and/or other materials +# provided with the distribution. +# * Neither the name of the Regents nor the names of its contributors +# may be used to endorse or promote products derived from this +# software without specific prior written permission. +# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +# SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, +# ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF +# REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF +# ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION +# TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR +# MODIFICATIONS. + +from nmigen import Module, Signal, Memory, Mux, Elaboratable +from nmigen.tools import bits_for +from nmigen.cli import main +from nmigen.lib.fifo import FIFOInterface + +# translated from https://github.com/freechipsproject/chisel3/blob/a4a29e29c3f1eed18f851dcf10bdc845571dfcb6/src/main/scala/chisel3/util/Decoupled.scala#L185 # noqa + + +class Queue(FIFOInterface, Elaboratable): + def __init__(self, width, depth, fwft=True, pipe=False): + """ Queue (FIFO) with pipe mode and first-write fall-through capability + + * :width: width of Queue data in/out + * :depth: queue depth. NOTE: may be set to 0 (this is ok) + * :fwft : first-write, fall-through mode (Chisel Queue "flow" mode) + * :pipe : pipe mode. NOTE: this mode can cause unanticipated + problems. when read is enabled, so is writeable. + therefore if read is enabled, the data ABSOLUTELY MUST + be read. + + fwft mode = True basically means that the data may be transferred + combinatorially from input to output. + + Attributes: + * level: available free space (number of unread entries) + + din = enq_data, writable = enq_ready, we = enq_valid + dout = deq_data, re = deq_ready, readable = deq_valid + """ + FIFOInterface.__init__(self, width, depth, fwft) + self.pipe = pipe + self.depth = depth + self.level = Signal(bits_for(depth)) + + def elaborate(self, platform): + m = Module() + + # set up an SRAM. XXX bug in Memory: cannot create SRAM of depth 1 + ram = Memory(self.width, self.depth if self.depth > 1 else 2) + m.submodules.ram_read = ram_read = ram.read_port(synchronous=False) + m.submodules.ram_write = ram_write = ram.write_port() + + # convenience names + p_ready_o = self.writable + p_valid_i = self.we + enq_data = self.din + + n_valid_o = self.readable + n_ready_i = self.re + deq_data = self.dout + + # intermediaries + ptr_width = bits_for(self.depth - 1) if self.depth > 1 else 0 + enq_ptr = Signal(ptr_width) # cyclic pointer to "insert" point (wrport) + deq_ptr = Signal(ptr_width) # cyclic pointer to "remove" point (rdport) + maybe_full = Signal() # not reset_less (set by sync) + + # temporaries + do_enq = Signal(reset_less=True) + do_deq = Signal(reset_less=True) + ptr_diff = Signal(ptr_width) + ptr_match = Signal(reset_less=True) + empty = Signal(reset_less=True) + full = Signal(reset_less=True) + enq_max = Signal(reset_less=True) + deq_max = Signal(reset_less=True) + + m.d.comb += [ptr_match.eq(enq_ptr == deq_ptr), # read-ptr = write-ptr + ptr_diff.eq(enq_ptr - deq_ptr), + enq_max.eq(enq_ptr == self.depth - 1), + deq_max.eq(deq_ptr == self.depth - 1), + empty.eq(ptr_match & ~maybe_full), + full.eq(ptr_match & maybe_full), + do_enq.eq(p_ready_o & p_valid_i), # write conditions ok + do_deq.eq(n_ready_i & n_valid_o), # read conditions ok + + # set readable and writable (NOTE: see pipe mode below) + n_valid_o.eq(~empty), # cannot read if empty! + p_ready_o.eq(~full), # cannot write if full! + + # set up memory and connect to input and output + ram_write.addr.eq(enq_ptr), + ram_write.data.eq(enq_data), + ram_write.en.eq(do_enq), + ram_read.addr.eq(deq_ptr), + deq_data.eq(ram_read.data) # NOTE: overridden in fwft mode + ] + + # under write conditions, SRAM write-pointer moves on next clock + with m.If(do_enq): + m.d.sync += enq_ptr.eq(Mux(enq_max, 0, enq_ptr+1)) + + # under read conditions, SRAM read-pointer moves on next clock + with m.If(do_deq): + m.d.sync += deq_ptr.eq(Mux(deq_max, 0, deq_ptr+1)) + + # if read-but-not-write or write-but-not-read, maybe_full set + with m.If(do_enq != do_deq): + m.d.sync += maybe_full.eq(do_enq) + + # first-word fall-through: same as "flow" parameter in Chisel3 Queue + # basically instead of relying on the Memory characteristics (which + # in FPGAs do not have write-through), then when the queue is empty + # take the output directly from the input, i.e. *bypass* the SRAM. + # this done combinatorially to give the exact same characteristics + # as Memory "write-through"... without relying on a changing API + if self.fwft: + with m.If(p_valid_i): + m.d.comb += n_valid_o.eq(1) + with m.If(empty): + m.d.comb += deq_data.eq(enq_data) + m.d.comb += do_deq.eq(0) + with m.If(n_ready_i): + m.d.comb += do_enq.eq(0) + + # pipe mode: if next stage says it's ready (readable), we + # *must* declare the input ready (writeable). + if self.pipe: + with m.If(n_ready_i): + m.d.comb += p_ready_o.eq(1) + + # set the count (available free space), optimise on power-of-two + if self.depth == 1 << ptr_width: # is depth a power of 2 + m.d.comb += self.level.eq( + Mux(maybe_full & ptr_match, self.depth, 0) | ptr_diff) + else: + m.d.comb += self.level.eq(Mux(ptr_match, + Mux(maybe_full, self.depth, 0), + Mux(deq_ptr > enq_ptr, + self.depth + ptr_diff, + ptr_diff))) + + return m + + +if __name__ == "__main__": + reg_stage = Queue(1, 1, pipe=True) + break_ready_chain_stage = Queue(1, 1, pipe=True, fwft=True) + m = Module() + ports = [] + + def queue_ports(queue, name_prefix): + retval = [] + for name in ["level", + "dout", + "readable", + "writable"]: + port = getattr(queue, name) + signal = Signal(port.shape(), name=name_prefix+name) + m.d.comb += signal.eq(port) + retval.append(signal) + for name in ["re", + "din", + "we"]: + port = getattr(queue, name) + signal = Signal(port.shape(), name=name_prefix+name) + m.d.comb += port.eq(signal) + retval.append(signal) + return retval + + m.submodules.reg_stage = reg_stage + ports += queue_ports(reg_stage, "reg_stage_") + m.submodules.break_ready_chain_stage = break_ready_chain_stage + ports += queue_ports(break_ready_chain_stage, "break_ready_chain_stage_") + main(m, ports=ports) diff --git a/src/ieee754/add/record_experiment.py b/src/ieee754/add/record_experiment.py new file mode 100644 index 00000000..1789c3bd --- /dev/null +++ b/src/ieee754/add/record_experiment.py @@ -0,0 +1,106 @@ +from nmigen import Module, Signal, Mux, Const, Elaboratable +from nmigen.hdl.rec import Record, Layout, DIR_NONE +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen.compat.fhdl.bitcontainer import value_bits_sign +from singlepipe import cat, RecordObject + + +class RecordTest: + + def __init__(self): + self.r1 = RecordObject() + self.r1.sig1 = Signal(16) + self.r1.r2 = RecordObject() + self.r1.r2.sig2 = Signal(16) + self.r1.r3 = RecordObject() + self.r1.r3.sig3 = Signal(16) + self.sig123 = Signal(48) + + def elaborate(self, platform): + m = Module() + + sig1 = Signal(16) + m.d.comb += sig1.eq(self.r1.sig1) + sig2 = Signal(16) + m.d.comb += sig2.eq(self.r1.r2.sig2) + + print (self.r1.fields) + print (self.r1.shape()) + print ("width", len(self.r1)) + m.d.comb += self.sig123.eq(cat(self.r1)) + + return m + + +def testbench(dut): + yield dut.r1.sig1.eq(5) + yield dut.r1.r2.sig2.eq(10) + yield dut.r1.r3.sig3.eq(1) + + sig1 = yield dut.r1.sig1 + assert sig1 == 5 + sig2 = yield dut.r1.r2.sig2 + assert sig2 == 10 + + yield + + sig123 = yield dut.sig123 + print ("sig123", hex(sig123)) + assert sig123 == 0x1000a0005 + + + +class RecordTest2(Elaboratable): + + def __init__(self): + self.r1 = RecordObject() + self.r1.sig1 = Signal(16) + self.r1.r2 = RecordObject() + self.r1.r2.sig2 = Signal(16) + self.r1.r3 = RecordObject() + self.r1.r3.sig3 = Signal(16) + self.sig123 = Signal(48) + + def elaborate(self, platform): + m = Module() + + m.d.comb += cat(self.r1).eq(self.sig123) + + return m + + +def testbench2(dut): + + sig123 = yield dut.sig123.eq(0x1000a0005) + + yield + + sig1 = yield dut.r1.sig1 + assert sig1 == 5 + sig2 = yield dut.r1.r2.sig2 + assert sig2 == 10 + sig3 = yield dut.r1.r3.sig3 + assert sig3 == 1 + + + +###################################################################### +# Unit Tests +###################################################################### + +if __name__ == '__main__': + print ("test 1") + dut = RecordTest() + run_simulation(dut, testbench(dut), vcd_name="test_record1.vcd") + vl = rtlil.convert(dut, ports=[dut.sig123, dut.r1.sig1, dut.r1.r2.sig2]) + with open("test_record1.il", "w") as f: + f.write(vl) + + print ("test 2") + dut = RecordTest2() + run_simulation(dut, testbench2(dut), vcd_name="test_record2.vcd") + vl = rtlil.convert(dut, ports=[dut.sig123, dut.r1.sig1, dut.r1.r2.sig2]) + with open("test_record2.il", "w") as f: + f.write(vl) + diff --git a/src/ieee754/add/rstation_row.py b/src/ieee754/add/rstation_row.py new file mode 100644 index 00000000..aeb58732 --- /dev/null +++ b/src/ieee754/add/rstation_row.py @@ -0,0 +1,39 @@ +from nmigen import Signal, Cat, Const, Mux, Module + +from nmigen.cli import main, verilog + +from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase +from fpbase import MultiShiftRMerge + +class ReservationStationRow: + + def __init__(self, width, id_wid): + """ Reservation Station row + + * width: bit-width of IEEE754. supported: 16, 32, 64 + * id_wid: an identifier to be passed through to the FunctionUnit + """ + self.width = width + + self.in_a = Signal(width) + self.in_b = Signal(width) + self.id_wid = id_wid + self.out_z = Signal(width) + + def elaborate(self, platform=None): + """ creates the HDL code-fragment for ReservationStationRow + """ + m = Module() + + return m + + +if __name__ == "__main__": + rs = ReservationStationRow(width=32, id_wid=Const(1,4)) + main(alu, ports=[rs.in_a, rs.in_b, rs.out_z] + + # works... but don't use, just do "python fname.py convert -t v" + #print (verilog.convert(alu, ports=[ + # ports=alu.in_a.ports() + \ + # alu.in_b.ports() + \ + # alu.out_z.ports()) diff --git a/src/ieee754/add/singlepipe.py b/src/ieee754/add/singlepipe.py new file mode 100644 index 00000000..68b62e43 --- /dev/null +++ b/src/ieee754/add/singlepipe.py @@ -0,0 +1,829 @@ +""" Pipeline API. For multi-input and multi-output variants, see multipipe. + + Associated development bugs: + * http://bugs.libre-riscv.org/show_bug.cgi?id=64 + * http://bugs.libre-riscv.org/show_bug.cgi?id=57 + + Important: see Stage API (stageapi.py) in combination with below + + RecordBasedStage: + ---------------- + + A convenience class that takes an input shape, output shape, a + "processing" function and an optional "setup" function. Honestly + though, there's not much more effort to just... create a class + that returns a couple of Records (see ExampleAddRecordStage in + examples). + + PassThroughStage: + ---------------- + + A convenience class that takes a single function as a parameter, + that is chain-called to create the exact same input and output spec. + It has a process() function that simply returns its input. + + Instances of this class are completely redundant if handed to + StageChain, however when passed to UnbufferedPipeline they + can be used to introduce a single clock delay. + + ControlBase: + ----------- + + The base class for pipelines. Contains previous and next ready/valid/data. + Also has an extremely useful "connect" function that can be used to + connect a chain of pipelines and present the exact same prev/next + ready/valid/data API. + + Note: pipelines basically do not become pipelines as such until + handed to a derivative of ControlBase. ControlBase itself is *not* + strictly considered a pipeline class. Wishbone and AXI4 (master or + slave) could be derived from ControlBase, for example. + UnbufferedPipeline: + ------------------ + + A simple stalling clock-synchronised pipeline that has no buffering + (unlike BufferedHandshake). Data flows on *every* clock cycle when + the conditions are right (this is nominally when the input is valid + and the output is ready). + + A stall anywhere along the line will result in a stall back-propagating + down the entire chain. The BufferedHandshake by contrast will buffer + incoming data, allowing previous stages one clock cycle's grace before + also having to stall. + + An advantage of the UnbufferedPipeline over the Buffered one is + that the amount of logic needed (number of gates) is greatly + reduced (no second set of buffers basically) + + The disadvantage of the UnbufferedPipeline is that the valid/ready + logic, if chained together, is *combinatorial*, resulting in + progressively larger gate delay. + + PassThroughHandshake: + ------------------ + + A Control class that introduces a single clock delay, passing its + data through unaltered. Unlike RegisterPipeline (which relies + on UnbufferedPipeline and PassThroughStage) it handles ready/valid + itself. + + RegisterPipeline: + ---------------- + + A convenience class that, because UnbufferedPipeline introduces a single + clock delay, when its stage is a PassThroughStage, it results in a Pipeline + stage that, duh, delays its (unmodified) input by one clock cycle. + + BufferedHandshake: + ---------------- + + nmigen implementation of buffered pipeline stage, based on zipcpu: + https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html + + this module requires quite a bit of thought to understand how it works + (and why it is needed in the first place). reading the above is + *strongly* recommended. + + unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires + the STB / ACK signals to raise and lower (on separate clocks) before + data may proceeed (thus only allowing one piece of data to proceed + on *ALTERNATE* cycles), the signalling here is a true pipeline + where data will flow on *every* clock when the conditions are right. + + input acceptance conditions are when: + * incoming previous-stage strobe (p.valid_i) is HIGH + * outgoing previous-stage ready (p.ready_o) is LOW + + output transmission conditions are when: + * outgoing next-stage strobe (n.valid_o) is HIGH + * outgoing next-stage ready (n.ready_i) is LOW + + the tricky bit is when the input has valid data and the output is not + ready to accept it. if it wasn't for the clock synchronisation, it + would be possible to tell the input "hey don't send that data, we're + not ready". unfortunately, it's not possible to "change the past": + the previous stage *has no choice* but to pass on its data. + + therefore, the incoming data *must* be accepted - and stored: that + is the responsibility / contract that this stage *must* accept. + on the same clock, it's possible to tell the input that it must + not send any more data. this is the "stall" condition. + + we now effectively have *two* possible pieces of data to "choose" from: + the buffered data, and the incoming data. the decision as to which + to process and output is based on whether we are in "stall" or not. + i.e. when the next stage is no longer ready, the output comes from + the buffer if a stall had previously occurred, otherwise it comes + direct from processing the input. + + this allows us to respect a synchronous "travelling STB" with what + dan calls a "buffered handshake". + + it's quite a complex state machine! + + SimpleHandshake + --------------- + + Synchronised pipeline, Based on: + https://github.com/ZipCPU/dbgbus/blob/master/hexbus/rtl/hbdeword.v +""" + +from nmigen import Signal, Mux, Module, Elaboratable +from nmigen.cli import verilog, rtlil +from nmigen.hdl.rec import Record + +from queue import Queue +import inspect + +from iocontrol import (PrevControl, NextControl, Object, RecordObject) +from stageapi import (_spec, StageCls, Stage, StageChain, StageHelper) +import nmoperator + + +class RecordBasedStage(Stage): + """ convenience class which provides a Records-based layout. + honestly it's a lot easier just to create a direct Records-based + class (see ExampleAddRecordStage) + """ + def __init__(self, in_shape, out_shape, processfn, setupfn=None): + self.in_shape = in_shape + self.out_shape = out_shape + self.__process = processfn + self.__setup = setupfn + def ispec(self): return Record(self.in_shape) + def ospec(self): return Record(self.out_shape) + def process(seif, i): return self.__process(i) + def setup(seif, m, i): return self.__setup(m, i) + + +class PassThroughStage(StageCls): + """ a pass-through stage with its input data spec identical to its output, + and "passes through" its data from input to output (does nothing). + + use this basically to explicitly make any data spec Stage-compliant. + (many APIs would potentially use a static "wrap" method in e.g. + StageCls to achieve a similar effect) + """ + def __init__(self, iospecfn): self.iospecfn = iospecfn + def ispec(self): return self.iospecfn() + def ospec(self): return self.iospecfn() + + +class ControlBase(StageHelper, Elaboratable): + """ Common functions for Pipeline API. Note: a "pipeline stage" only + exists (conceptually) when a ControlBase derivative is handed + a Stage (combinatorial block) + + NOTE: ControlBase derives from StageHelper, making it accidentally + compliant with the Stage API. Using those functions directly + *BYPASSES* a ControlBase instance ready/valid signalling, which + clearly should not be done without a really, really good reason. + """ + def __init__(self, stage=None, in_multi=None, stage_ctl=False): + """ Base class containing ready/valid/data to previous and next stages + + * p: contains ready/valid to the previous stage + * n: contains ready/valid to the next stage + + Except when calling Controlbase.connect(), user must also: + * add data_i member to PrevControl (p) and + * add data_o member to NextControl (n) + Calling ControlBase._new_data is a good way to do that. + """ + StageHelper.__init__(self, stage) + + # set up input and output IO ACK (prev/next ready/valid) + self.p = PrevControl(in_multi, stage_ctl) + self.n = NextControl(stage_ctl) + + # set up the input and output data + if stage is not None: + self._new_data("data") + + def _new_data(self, name): + """ allocates new data_i and data_o + """ + self.p.data_i, self.n.data_o = self.new_specs(name) + + @property + def data_r(self): + return self.process(self.p.data_i) + + def connect_to_next(self, nxt): + """ helper function to connect to the next stage data/valid/ready. + """ + return self.n.connect_to_next(nxt.p) + + def _connect_in(self, prev): + """ internal helper function to connect stage to an input source. + do not use to connect stage-to-stage! + """ + return self.p._connect_in(prev.p) + + def _connect_out(self, nxt): + """ internal helper function to connect stage to an output source. + do not use to connect stage-to-stage! + """ + return self.n._connect_out(nxt.n) + + def connect(self, pipechain): + """ connects a chain (list) of Pipeline instances together and + links them to this ControlBase instance: + + in <----> self <---> out + | ^ + v | + [pipe1, pipe2, pipe3, pipe4] + | ^ | ^ | ^ + v | v | v | + out---in out--in out---in + + Also takes care of allocating data_i/data_o, by looking up + the data spec for each end of the pipechain. i.e It is NOT + necessary to allocate self.p.data_i or self.n.data_o manually: + this is handled AUTOMATICALLY, here. + + Basically this function is the direct equivalent of StageChain, + except that unlike StageChain, the Pipeline logic is followed. + + Just as StageChain presents an object that conforms to the + Stage API from a list of objects that also conform to the + Stage API, an object that calls this Pipeline connect function + has the exact same pipeline API as the list of pipline objects + it is called with. + + Thus it becomes possible to build up larger chains recursively. + More complex chains (multi-input, multi-output) will have to be + done manually. + + Argument: + + * :pipechain: - a sequence of ControlBase-derived classes + (must be one or more in length) + + Returns: + + * a list of eq assignments that will need to be added in + an elaborate() to m.d.comb + """ + assert len(pipechain) > 0, "pipechain must be non-zero length" + assert self.stage is None, "do not use connect with a stage" + eqs = [] # collated list of assignment statements + + # connect inter-chain + for i in range(len(pipechain)-1): + pipe1 = pipechain[i] # earlier + pipe2 = pipechain[i+1] # later (by 1) + eqs += pipe1.connect_to_next(pipe2) # earlier n to later p + + # connect front and back of chain to ourselves + front = pipechain[0] # first in chain + end = pipechain[-1] # last in chain + self.set_specs(front, end) # sets up ispec/ospec functions + self._new_data("chain") # NOTE: REPLACES existing data + eqs += front._connect_in(self) # front p to our p + eqs += end._connect_out(self) # end n to our n + + return eqs + + def set_input(self, i): + """ helper function to set the input data (used in unit tests) + """ + return nmoperator.eq(self.p.data_i, i) + + def __iter__(self): + yield from self.p # yields ready/valid/data (data also gets yielded) + yield from self.n # ditto + + def ports(self): + return list(self) + + def elaborate(self, platform): + """ handles case where stage has dynamic ready/valid functions + """ + m = Module() + m.submodules.p = self.p + m.submodules.n = self.n + + self.setup(m, self.p.data_i) + + if not self.p.stage_ctl: + return m + + # intercept the previous (outgoing) "ready", combine with stage ready + m.d.comb += self.p.s_ready_o.eq(self.p._ready_o & self.stage.d_ready) + + # intercept the next (incoming) "ready" and combine it with data valid + sdv = self.stage.d_valid(self.n.ready_i) + m.d.comb += self.n.d_valid.eq(self.n.ready_i & sdv) + + return m + + +class BufferedHandshake(ControlBase): + """ buffered pipeline stage. data and strobe signals travel in sync. + if ever the input is ready and the output is not, processed data + is shunted in a temporary register. + + Argument: stage. see Stage API above + + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 + | | + process --->----^ + | | + +-- r_data ->-+ + + input data p.data_i is read (only), is processed and goes into an + intermediate result store [process()]. this is updated combinatorially. + + in a non-stall condition, the intermediate result will go into the + output (update_output). however if ever there is a stall, it goes + into r_data instead [update_buffer()]. + + when the non-stall condition is released, r_data is the first + to be transferred to the output [flush_buffer()], and the stall + condition cleared. + + on the next cycle (as long as stall is not raised again) the + input may begin to be processed and transferred directly to output. + """ + + def elaborate(self, platform): + self.m = ControlBase.elaborate(self, platform) + + result = _spec(self.stage.ospec, "r_tmp") + r_data = _spec(self.stage.ospec, "r_data") + + # establish some combinatorial temporaries + o_n_validn = Signal(reset_less=True) + n_ready_i = Signal(reset_less=True, name="n_i_rdy_data") + nir_por = Signal(reset_less=True) + nir_por_n = Signal(reset_less=True) + p_valid_i = Signal(reset_less=True) + nir_novn = Signal(reset_less=True) + nirn_novn = Signal(reset_less=True) + por_pivn = Signal(reset_less=True) + npnn = Signal(reset_less=True) + self.m.d.comb += [p_valid_i.eq(self.p.valid_i_test), + o_n_validn.eq(~self.n.valid_o), + n_ready_i.eq(self.n.ready_i_test), + nir_por.eq(n_ready_i & self.p._ready_o), + nir_por_n.eq(n_ready_i & ~self.p._ready_o), + nir_novn.eq(n_ready_i | o_n_validn), + nirn_novn.eq(~n_ready_i & o_n_validn), + npnn.eq(nir_por | nirn_novn), + por_pivn.eq(self.p._ready_o & ~p_valid_i) + ] + + # store result of processing in combinatorial temporary + self.m.d.comb += nmoperator.eq(result, self.data_r) + + # if not in stall condition, update the temporary register + with self.m.If(self.p.ready_o): # not stalled + self.m.d.sync += nmoperator.eq(r_data, result) # update buffer + + # data pass-through conditions + with self.m.If(npnn): + data_o = self._postprocess(result) # XXX TBD, does nothing right now + self.m.d.sync += [self.n.valid_o.eq(p_valid_i), # valid if p_valid + nmoperator.eq(self.n.data_o, data_o), # update out + ] + # buffer flush conditions (NOTE: can override data passthru conditions) + with self.m.If(nir_por_n): # not stalled + # Flush the [already processed] buffer to the output port. + data_o = self._postprocess(r_data) # XXX TBD, does nothing right now + self.m.d.sync += [self.n.valid_o.eq(1), # reg empty + nmoperator.eq(self.n.data_o, data_o), # flush + ] + # output ready conditions + self.m.d.sync += self.p._ready_o.eq(nir_novn | por_pivn) + + return self.m + + +class SimpleHandshake(ControlBase): + """ simple handshake control. data and strobe signals travel in sync. + implements the protocol used by Wishbone and AXI4. + + Argument: stage. see Stage API above + + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 + | | + +--process->--^ + Truth Table + + Inputs Temporary Output Data + ------- ---------- ----- ---- + P P N N PiV& ~NiR& N P + i o i o PoR NoV o o + V R R V V R + + ------- - - - - + 0 0 0 0 0 0 >0 0 reg + 0 0 0 1 0 1 >1 0 reg + 0 0 1 0 0 0 0 1 process(data_i) + 0 0 1 1 0 0 0 1 process(data_i) + ------- - - - - + 0 1 0 0 0 0 >0 0 reg + 0 1 0 1 0 1 >1 0 reg + 0 1 1 0 0 0 0 1 process(data_i) + 0 1 1 1 0 0 0 1 process(data_i) + ------- - - - - + 1 0 0 0 0 0 >0 0 reg + 1 0 0 1 0 1 >1 0 reg + 1 0 1 0 0 0 0 1 process(data_i) + 1 0 1 1 0 0 0 1 process(data_i) + ------- - - - - + 1 1 0 0 1 0 1 0 process(data_i) + 1 1 0 1 1 1 1 0 process(data_i) + 1 1 1 0 1 0 1 1 process(data_i) + 1 1 1 1 1 0 1 1 process(data_i) + ------- - - - - + """ + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + r_busy = Signal() + result = _spec(self.stage.ospec, "r_tmp") + + # establish some combinatorial temporaries + n_ready_i = Signal(reset_less=True, name="n_i_rdy_data") + p_valid_i_p_ready_o = Signal(reset_less=True) + p_valid_i = Signal(reset_less=True) + m.d.comb += [p_valid_i.eq(self.p.valid_i_test), + n_ready_i.eq(self.n.ready_i_test), + p_valid_i_p_ready_o.eq(p_valid_i & self.p.ready_o), + ] + + # store result of processing in combinatorial temporary + m.d.comb += nmoperator.eq(result, self.data_r) + + # previous valid and ready + with m.If(p_valid_i_p_ready_o): + data_o = self._postprocess(result) # XXX TBD, does nothing right now + m.d.sync += [r_busy.eq(1), # output valid + nmoperator.eq(self.n.data_o, data_o), # update output + ] + # previous invalid or not ready, however next is accepting + with m.Elif(n_ready_i): + data_o = self._postprocess(result) # XXX TBD, does nothing right now + m.d.sync += [nmoperator.eq(self.n.data_o, data_o)] + # TODO: could still send data here (if there was any) + #m.d.sync += self.n.valid_o.eq(0) # ...so set output invalid + m.d.sync += r_busy.eq(0) # ...so set output invalid + + m.d.comb += self.n.valid_o.eq(r_busy) + # if next is ready, so is previous + m.d.comb += self.p._ready_o.eq(n_ready_i) + + return self.m + + +class UnbufferedPipeline(ControlBase): + """ A simple pipeline stage with single-clock synchronisation + and two-way valid/ready synchronised signalling. + + Note that a stall in one stage will result in the entire pipeline + chain stalling. + + Also that unlike BufferedHandshake, the valid/ready signalling does NOT + travel synchronously with the data: the valid/ready signalling + combines in a *combinatorial* fashion. Therefore, a long pipeline + chain will lengthen propagation delays. + + Argument: stage. see Stage API, above + + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 + | | + r_data result + | | + +--process ->-+ + + Attributes: + ----------- + p.data_i : StageInput, shaped according to ispec + The pipeline input + p.data_o : StageOutput, shaped according to ospec + The pipeline output + r_data : input_shape according to ispec + A temporary (buffered) copy of a prior (valid) input. + This is HELD if the output is not ready. It is updated + SYNCHRONOUSLY. + result: output_shape according to ospec + The output of the combinatorial logic. it is updated + COMBINATORIALLY (no clock dependence). + + Truth Table + + Inputs Temp Output Data + ------- - ----- ---- + P P N N ~NiR& N P + i o i o NoV o o + V R R V V R + + ------- - - - + 0 0 0 0 0 0 1 reg + 0 0 0 1 1 1 0 reg + 0 0 1 0 0 0 1 reg + 0 0 1 1 0 0 1 reg + ------- - - - + 0 1 0 0 0 0 1 reg + 0 1 0 1 1 1 0 reg + 0 1 1 0 0 0 1 reg + 0 1 1 1 0 0 1 reg + ------- - - - + 1 0 0 0 0 1 1 reg + 1 0 0 1 1 1 0 reg + 1 0 1 0 0 1 1 reg + 1 0 1 1 0 1 1 reg + ------- - - - + 1 1 0 0 0 1 1 process(data_i) + 1 1 0 1 1 1 0 process(data_i) + 1 1 1 0 0 1 1 process(data_i) + 1 1 1 1 0 1 1 process(data_i) + ------- - - - + + Note: PoR is *NOT* involved in the above decision-making. + """ + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + data_valid = Signal() # is data valid or not + r_data = _spec(self.stage.ospec, "r_tmp") # output type + + # some temporaries + p_valid_i = Signal(reset_less=True) + pv = Signal(reset_less=True) + buf_full = Signal(reset_less=True) + m.d.comb += p_valid_i.eq(self.p.valid_i_test) + m.d.comb += pv.eq(self.p.valid_i & self.p.ready_o) + m.d.comb += buf_full.eq(~self.n.ready_i_test & data_valid) + + m.d.comb += self.n.valid_o.eq(data_valid) + m.d.comb += self.p._ready_o.eq(~data_valid | self.n.ready_i_test) + m.d.sync += data_valid.eq(p_valid_i | buf_full) + + with m.If(pv): + m.d.sync += nmoperator.eq(r_data, self.data_r) + data_o = self._postprocess(r_data) # XXX TBD, does nothing right now + m.d.comb += nmoperator.eq(self.n.data_o, data_o) + + return self.m + +class UnbufferedPipeline2(ControlBase): + """ A simple pipeline stage with single-clock synchronisation + and two-way valid/ready synchronised signalling. + + Note that a stall in one stage will result in the entire pipeline + chain stalling. + + Also that unlike BufferedHandshake, the valid/ready signalling does NOT + travel synchronously with the data: the valid/ready signalling + combines in a *combinatorial* fashion. Therefore, a long pipeline + chain will lengthen propagation delays. + + Argument: stage. see Stage API, above + + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 + | | | + +- process-> buf <-+ + Attributes: + ----------- + p.data_i : StageInput, shaped according to ispec + The pipeline input + p.data_o : StageOutput, shaped according to ospec + The pipeline output + buf : output_shape according to ospec + A temporary (buffered) copy of a valid output + This is HELD if the output is not ready. It is updated + SYNCHRONOUSLY. + + Inputs Temp Output Data + ------- - ----- + P P N N ~NiR& N P (buf_full) + i o i o NoV o o + V R R V V R + + ------- - - - + 0 0 0 0 0 0 1 process(data_i) + 0 0 0 1 1 1 0 reg (odata, unchanged) + 0 0 1 0 0 0 1 process(data_i) + 0 0 1 1 0 0 1 process(data_i) + ------- - - - + 0 1 0 0 0 0 1 process(data_i) + 0 1 0 1 1 1 0 reg (odata, unchanged) + 0 1 1 0 0 0 1 process(data_i) + 0 1 1 1 0 0 1 process(data_i) + ------- - - - + 1 0 0 0 0 1 1 process(data_i) + 1 0 0 1 1 1 0 reg (odata, unchanged) + 1 0 1 0 0 1 1 process(data_i) + 1 0 1 1 0 1 1 process(data_i) + ------- - - - + 1 1 0 0 0 1 1 process(data_i) + 1 1 0 1 1 1 0 reg (odata, unchanged) + 1 1 1 0 0 1 1 process(data_i) + 1 1 1 1 0 1 1 process(data_i) + ------- - - - + + Note: PoR is *NOT* involved in the above decision-making. + """ + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + buf_full = Signal() # is data valid or not + buf = _spec(self.stage.ospec, "r_tmp") # output type + + # some temporaries + p_valid_i = Signal(reset_less=True) + m.d.comb += p_valid_i.eq(self.p.valid_i_test) + + m.d.comb += self.n.valid_o.eq(buf_full | p_valid_i) + m.d.comb += self.p._ready_o.eq(~buf_full) + m.d.sync += buf_full.eq(~self.n.ready_i_test & self.n.valid_o) + + data_o = Mux(buf_full, buf, self.data_r) + data_o = self._postprocess(data_o) # XXX TBD, does nothing right now + m.d.comb += nmoperator.eq(self.n.data_o, data_o) + m.d.sync += nmoperator.eq(buf, self.n.data_o) + + return self.m + + +class PassThroughHandshake(ControlBase): + """ A control block that delays by one clock cycle. + + Inputs Temporary Output Data + ------- ------------------ ----- ---- + P P N N PiV& PiV| NiR| pvr N P (pvr) + i o i o PoR ~PoR ~NoV o o + V R R V V R + + ------- - - - - - - + 0 0 0 0 0 1 1 0 1 1 odata (unchanged) + 0 0 0 1 0 1 0 0 1 0 odata (unchanged) + 0 0 1 0 0 1 1 0 1 1 odata (unchanged) + 0 0 1 1 0 1 1 0 1 1 odata (unchanged) + ------- - - - - - - + 0 1 0 0 0 0 1 0 0 1 odata (unchanged) + 0 1 0 1 0 0 0 0 0 0 odata (unchanged) + 0 1 1 0 0 0 1 0 0 1 odata (unchanged) + 0 1 1 1 0 0 1 0 0 1 odata (unchanged) + ------- - - - - - - + 1 0 0 0 0 1 1 1 1 1 process(in) + 1 0 0 1 0 1 0 0 1 0 odata (unchanged) + 1 0 1 0 0 1 1 1 1 1 process(in) + 1 0 1 1 0 1 1 1 1 1 process(in) + ------- - - - - - - + 1 1 0 0 1 1 1 1 1 1 process(in) + 1 1 0 1 1 1 0 0 1 0 odata (unchanged) + 1 1 1 0 1 1 1 1 1 1 process(in) + 1 1 1 1 1 1 1 1 1 1 process(in) + ------- - - - - - - + + """ + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + r_data = _spec(self.stage.ospec, "r_tmp") # output type + + # temporaries + p_valid_i = Signal(reset_less=True) + pvr = Signal(reset_less=True) + m.d.comb += p_valid_i.eq(self.p.valid_i_test) + m.d.comb += pvr.eq(p_valid_i & self.p.ready_o) + + m.d.comb += self.p.ready_o.eq(~self.n.valid_o | self.n.ready_i_test) + m.d.sync += self.n.valid_o.eq(p_valid_i | ~self.p.ready_o) + + odata = Mux(pvr, self.data_r, r_data) + m.d.sync += nmoperator.eq(r_data, odata) + r_data = self._postprocess(r_data) # XXX TBD, does nothing right now + m.d.comb += nmoperator.eq(self.n.data_o, r_data) + + return m + + +class RegisterPipeline(UnbufferedPipeline): + """ A pipeline stage that delays by one clock cycle, creating a + sync'd latch out of data_o and valid_o as an indirect byproduct + of using PassThroughStage + """ + def __init__(self, iospecfn): + UnbufferedPipeline.__init__(self, PassThroughStage(iospecfn)) + + +class FIFOControl(ControlBase): + """ FIFO Control. Uses Queue to store data, coincidentally + happens to have same valid/ready signalling as Stage API. + + data_i -> fifo.din -> FIFO -> fifo.dout -> data_o + """ + def __init__(self, depth, stage, in_multi=None, stage_ctl=False, + fwft=True, pipe=False): + """ FIFO Control + + * :depth: number of entries in the FIFO + * :stage: data processing block + * :fwft: first word fall-thru mode (non-fwft introduces delay) + * :pipe: specifies pipe mode. + + when fwft = True it indicates that transfers may occur + combinatorially through stage processing in the same clock cycle. + This requires that the Stage be a Moore FSM: + https://en.wikipedia.org/wiki/Moore_machine + + when fwft = False it indicates that all output signals are + produced only from internal registers or memory, i.e. that the + Stage is a Mealy FSM: + https://en.wikipedia.org/wiki/Mealy_machine + + data is processed (and located) as follows: + + self.p self.stage temp fn temp fn temp fp self.n + data_i->process()->result->cat->din.FIFO.dout->cat(data_o) + + yes, really: cat produces a Cat() which can be assigned to. + this is how the FIFO gets de-catted without needing a de-cat + function + """ + self.fwft = fwft + self.pipe = pipe + self.fdepth = depth + ControlBase.__init__(self, stage, in_multi, stage_ctl) + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + # make a FIFO with a signal of equal width to the data_o. + (fwidth, _) = nmoperator.shape(self.n.data_o) + fifo = Queue(fwidth, self.fdepth, fwft=self.fwft, pipe=self.pipe) + m.submodules.fifo = fifo + + def processfn(data_i): + # store result of processing in combinatorial temporary + result = _spec(self.stage.ospec, "r_temp") + m.d.comb += nmoperator.eq(result, self.process(data_i)) + return nmoperator.cat(result) + + ## prev: make the FIFO (Queue object) "look" like a PrevControl... + m.submodules.fp = fp = PrevControl() + fp.valid_i, fp._ready_o, fp.data_i = fifo.we, fifo.writable, fifo.din + m.d.comb += fp._connect_in(self.p, fn=processfn) + + # next: make the FIFO (Queue object) "look" like a NextControl... + m.submodules.fn = fn = NextControl() + fn.valid_o, fn.ready_i, fn.data_o = fifo.readable, fifo.re, fifo.dout + connections = fn._connect_out(self.n, fn=nmoperator.cat) + + # ok ok so we can't just do the ready/valid eqs straight: + # first 2 from connections are the ready/valid, 3rd is data. + if self.fwft: + m.d.comb += connections[:2] # combinatorial on next ready/valid + else: + m.d.sync += connections[:2] # non-fwft mode needs sync + data_o = connections[2] # get the data + data_o = self._postprocess(data_o) # XXX TBD, does nothing right now + m.d.comb += data_o + + return m + + +# aka "RegStage". +class UnbufferedPipeline(FIFOControl): + def __init__(self, stage, in_multi=None, stage_ctl=False): + FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl, + fwft=True, pipe=False) + +# aka "BreakReadyStage" XXX had to set fwft=True to get it to work +class PassThroughHandshake(FIFOControl): + def __init__(self, stage, in_multi=None, stage_ctl=False): + FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl, + fwft=True, pipe=True) + +# this is *probably* BufferedHandshake, although test #997 now succeeds. +class BufferedHandshake(FIFOControl): + def __init__(self, stage, in_multi=None, stage_ctl=False): + FIFOControl.__init__(self, 2, stage, in_multi, stage_ctl, + fwft=True, pipe=False) + + +""" +# this is *probably* SimpleHandshake (note: memory cell size=0) +class SimpleHandshake(FIFOControl): + def __init__(self, stage, in_multi=None, stage_ctl=False): + FIFOControl.__init__(self, 0, stage, in_multi, stage_ctl, + fwft=True, pipe=False) +""" diff --git a/src/ieee754/add/stageapi.py b/src/ieee754/add/stageapi.py new file mode 100644 index 00000000..9651bf79 --- /dev/null +++ b/src/ieee754/add/stageapi.py @@ -0,0 +1,271 @@ +""" Stage API + + Associated development bugs: + * http://bugs.libre-riscv.org/show_bug.cgi?id=64 + * http://bugs.libre-riscv.org/show_bug.cgi?id=57 + + Stage API: + --------- + + stage requires compliance with a strict API that may be + implemented in several means, including as a static class. + + Stages do not HOLD data, and they definitely do not contain + signalling (ready/valid). They do however specify the FORMAT + of the incoming and outgoing data, and they provide a means to + PROCESS that data (from incoming format to outgoing format). + + Stage Blocks really should be combinatorial blocks (Moore FSMs). + It would be ok to have input come in from sync'd sources + (clock-driven, Mealy FSMs) however by doing so they would no longer + be deterministic, and chaining such blocks with such side-effects + together could result in unexpected, unpredictable, unreproduceable + behaviour. + + So generally to be avoided, then unless you know what you are doing. + https://en.wikipedia.org/wiki/Moore_machine + https://en.wikipedia.org/wiki/Mealy_machine + + the methods of a stage instance must be as follows: + + * ispec() - Input data format specification. Takes a bit of explaining. + The requirements are: something that eventually derives from + nmigen Value must be returned *OR* an iterator or iterable + or sequence (list, tuple etc.) or generator must *yield* + thing(s) that (eventually) derive from the nmigen Value class. + + Complex to state, very simple in practice: + see test_buf_pipe.py for over 25 worked examples. + + * ospec() - Output data format specification. + format requirements identical to ispec. + + * process(m, i) - Optional function for processing ispec-formatted data. + returns a combinatorial block of a result that + may be assigned to the output, by way of the "nmoperator.eq" + function. Note that what is returned here can be + extremely flexible. Even a dictionary can be returned + as long as it has fields that match precisely with the + Record into which its values is intended to be assigned. + Again: see example unit tests for details. + + * setup(m, i) - Optional function for setting up submodules. + may be used for more complex stages, to link + the input (i) to submodules. must take responsibility + for adding those submodules to the module (m). + the submodules must be combinatorial blocks and + must have their inputs and output linked combinatorially. + + Both StageCls (for use with non-static classes) and Stage (for use + by static classes) are abstract classes from which, for convenience + and as a courtesy to other developers, anything conforming to the + Stage API may *choose* to derive. See Liskov Substitution Principle: + https://en.wikipedia.org/wiki/Liskov_substitution_principle + + StageChain: + ---------- + + A useful combinatorial wrapper around stages that chains them together + and then presents a Stage-API-conformant interface. By presenting + the same API as the stages it wraps, it can clearly be used recursively. + + StageHelper: + ---------- + + A convenience wrapper around a Stage-API-compliant "thing" which + complies with the Stage API and provides mandatory versions of + all the optional bits. +""" + +from abc import ABCMeta, abstractmethod +import inspect + +import nmoperator + + +def _spec(fn, name=None): + """ useful function that determines if "fn" has an argument "name". + if so, fn(name) is called otherwise fn() is called. + + means that ispec and ospec can be declared with *or without* + a name argument. normally it would be necessary to have + "ispec(name=None)" to achieve the same effect. + """ + if name is None: + return fn() + varnames = dict(inspect.getmembers(fn.__code__))['co_varnames'] + if 'name' in varnames: + return fn(name=name) + return fn() + + +class StageCls(metaclass=ABCMeta): + """ Class-based "Stage" API. requires instantiation (after derivation) + + see "Stage API" above.. Note: python does *not* require derivation + from this class. All that is required is that the pipelines *have* + the functions listed in this class. Derivation from this class + is therefore merely a "courtesy" to maintainers. + """ + @abstractmethod + def ispec(self): pass # REQUIRED + @abstractmethod + def ospec(self): pass # REQUIRED + #@abstractmethod + #def setup(self, m, i): pass # OPTIONAL + #@abstractmethod + #def process(self, i): pass # OPTIONAL + + +class Stage(metaclass=ABCMeta): + """ Static "Stage" API. does not require instantiation (after derivation) + + see "Stage API" above. Note: python does *not* require derivation + from this class. All that is required is that the pipelines *have* + the functions listed in this class. Derivation from this class + is therefore merely a "courtesy" to maintainers. + """ + @staticmethod + @abstractmethod + def ispec(): pass + + @staticmethod + @abstractmethod + def ospec(): pass + + #@staticmethod + #@abstractmethod + #def setup(m, i): pass + + #@staticmethod + #@abstractmethod + #def process(i): pass + + +class StageHelper(Stage): + """ a convenience wrapper around something that is Stage-API-compliant. + (that "something" may be a static class, for example). + + StageHelper happens to also be compliant with the Stage API, + it differs from the stage that it wraps in that all the "optional" + functions are provided (hence the designation "convenience wrapper") + """ + def __init__(self, stage): + self.stage = stage + self._ispecfn = None + self._ospecfn = None + if stage is not None: + self.set_specs(self, self) + + def ospec(self, name): + assert self._ospecfn is not None + return _spec(self._ospecfn, name) + + def ispec(self, name): + assert self._ispecfn is not None + return _spec(self._ispecfn, name) + + def set_specs(self, p, n): + """ sets up the ispecfn and ospecfn for getting input and output data + """ + if hasattr(p, "stage"): + p = p.stage + if hasattr(n, "stage"): + n = n.stage + self._ispecfn = p.ispec + self._ospecfn = n.ospec + + def new_specs(self, name): + """ allocates new ispec and ospec pair + """ + return (_spec(self.ispec, "%s_i" % name), + _spec(self.ospec, "%s_o" % name)) + + def process(self, i): + if self.stage and hasattr(self.stage, "process"): + return self.stage.process(i) + return i + + def setup(self, m, i): + if self.stage is not None and hasattr(self.stage, "setup"): + self.stage.setup(m, i) + + def _postprocess(self, i): # XXX DISABLED + return i # RETURNS INPUT + if hasattr(self.stage, "postprocess"): + return self.stage.postprocess(i) + return i + + +class StageChain(StageHelper): + """ pass in a list of stages, and they will automatically be + chained together via their input and output specs into a + combinatorial chain, to create one giant combinatorial block. + + the end result basically conforms to the exact same Stage API. + + * input to this class will be the input of the first stage + * output of first stage goes into input of second + * output of second goes into input into third + * ... (etc. etc.) + * the output of this class will be the output of the last stage + + NOTE: whilst this is very similar to ControlBase.connect(), it is + *really* important to appreciate that StageChain is pure + combinatorial and bypasses (does not involve, at all, ready/valid + signalling of any kind). + + ControlBase.connect on the other hand respects, connects, and uses + ready/valid signalling. + + Arguments: + + * :chain: a chain of combinatorial blocks conforming to the Stage API + NOTE: StageChain.ispec and ospect have to have something + to return (beginning and end specs of the chain), + therefore the chain argument must be non-zero length + + * :specallocate: if set, new input and output data will be allocated + and connected (eq'd) to each chained Stage. + in some cases if this is not done, the nmigen warning + "driving from two sources, module is being flattened" + will be issued. + + NOTE: do NOT use StageChain with combinatorial blocks that have + side-effects (state-based / clock-based input) or conditional + (inter-chain) dependencies, unless you really know what you are doing. + """ + def __init__(self, chain, specallocate=False): + assert len(chain) > 0, "stage chain must be non-zero length" + self.chain = chain + StageHelper.__init__(self, None) + self.setup = self._sa_setup if specallocate else self._na_setup + self.set_specs(self.chain[0], self.chain[-1]) + + def _sa_setup(self, m, i): + for (idx, c) in enumerate(self.chain): + if hasattr(c, "setup"): + c.setup(m, i) # stage may have some module stuff + ofn = self.chain[idx].ospec # last assignment survives + o = _spec(ofn, 'chainin%d' % idx) + m.d.comb += nmoperator.eq(o, c.process(i)) # process input into "o" + if idx == len(self.chain)-1: + break + ifn = self.chain[idx+1].ispec # new input on next loop + i = _spec(ifn, 'chainin%d' % (idx+1)) + m.d.comb += nmoperator.eq(i, o) # assign to next input + self.o = o + return self.o # last loop is the output + + def _na_setup(self, m, i): + for (idx, c) in enumerate(self.chain): + if hasattr(c, "setup"): + c.setup(m, i) # stage may have some module stuff + i = o = c.process(i) # store input into "o" + self.o = o + return self.o # last loop is the output + + def process(self, i): + return self.o # conform to Stage API: return last-loop output + + diff --git a/src/ieee754/add/test_add.py b/src/ieee754/add/test_add.py new file mode 100644 index 00000000..989cf482 --- /dev/null +++ b/src/ieee754/add/test_add.py @@ -0,0 +1,78 @@ +from operator import add + +from nmigen import Module, Signal +from nmigen.compat.sim import run_simulation + +from nmigen_add_experiment import FPADD + +from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan, + is_inf, is_pos_inf, is_neg_inf, + match, get_rs_case, check_rs_case, run_test, + run_edge_cases, run_corner_cases) + +def testbench(dut): + yield from check_rs_case(dut, 0x36093399, 0x7f6a12f1, 0x7f6a12f1) + yield from check_rs_case(dut, 0x006CE3EE, 0x806CE3EC, 0x00000002) + yield from check_rs_case(dut, 0x00000047, 0x80000048, 0x80000001) + yield from check_rs_case(dut, 0x000116C2, 0x8001170A, 0x80000048) + yield from check_rs_case(dut, 0x7ed01f25, 0xff559e2c, 0xfedb1d33) + yield from check_rs_case(dut, 0, 0, 0) + yield from check_rs_case(dut, 0xFFFFFFFF, 0xC63B800A, 0x7FC00000) + yield from check_rs_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000) + #yield from check_rs_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000) + yield from check_rs_case(dut, 0x7F800000, 0xFF800000, 0x7FC00000) + yield from check_rs_case(dut, 0x42540000, 0xC2540000, 0x00000000) + yield from check_rs_case(dut, 0xC2540000, 0x42540000, 0x00000000) + yield from check_rs_case(dut, 0xfe34f995, 0xff5d59ad, 0xff800000) + yield from check_rs_case(dut, 0x82471f51, 0x243985f, 0x801c3790) + yield from check_rs_case(dut, 0x40000000, 0xc0000000, 0x00000000) + yield from check_rs_case(dut, 0x3F800000, 0x40000000, 0x40400000) + yield from check_rs_case(dut, 0x40000000, 0x3F800000, 0x40400000) + yield from check_rs_case(dut, 0x447A0000, 0x4488B000, 0x4502D800) + yield from check_rs_case(dut, 0x463B800A, 0x42BA8A3D, 0x463CF51E) + yield from check_rs_case(dut, 0x42BA8A3D, 0x463B800A, 0x463CF51E) + yield from check_rs_case(dut, 0x463B800A, 0xC2BA8A3D, 0x463A0AF6) + yield from check_rs_case(dut, 0xC2BA8A3D, 0x463B800A, 0x463A0AF6) + yield from check_rs_case(dut, 0xC63B800A, 0x42BA8A3D, 0xC63A0AF6) + yield from check_rs_case(dut, 0x42BA8A3D, 0xC63B800A, 0xC63A0AF6) + yield from check_rs_case(dut, 0x7F800000, 0x00000000, 0x7F800000) + yield from check_rs_case(dut, 0x00000000, 0x7F800000, 0x7F800000) + yield from check_rs_case(dut, 0xFF800000, 0x00000000, 0xFF800000) + yield from check_rs_case(dut, 0x00000000, 0xFF800000, 0xFF800000) + yield from check_rs_case(dut, 0x7F800000, 0x7F800000, 0x7F800000) + yield from check_rs_case(dut, 0xFF800000, 0xFF800000, 0xFF800000) + yield from check_rs_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000) + yield from check_rs_case(dut, 0x00018643, 0x00FA72A4, 0x00FBF8E7) + yield from check_rs_case(dut, 0x001A2239, 0x00FA72A4, 0x010A4A6E) + yield from check_rs_case(dut, 0x3F7FFFFE, 0x3F7FFFFE, 0x3FFFFFFE) + yield from check_rs_case(dut, 0x7EFFFFEE, 0x7EFFFFEE, 0x7F7FFFEE) + yield from check_rs_case(dut, 0x7F7FFFEE, 0xFEFFFFEE, 0x7EFFFFEE) + yield from check_rs_case(dut, 0x7F7FFFEE, 0x756CA884, 0x7F7FFFFD) + yield from check_rs_case(dut, 0x7F7FFFEE, 0x758A0CF8, 0x7F7FFFFF) + yield from check_rs_case(dut, 0x42500000, 0x51A7A358, 0x51A7A358) + yield from check_rs_case(dut, 0x51A7A358, 0x42500000, 0x51A7A358) + yield from check_rs_case(dut, 0x4E5693A4, 0x42500000, 0x4E5693A5) + yield from check_rs_case(dut, 0x42500000, 0x4E5693A4, 0x4E5693A5) + #yield from check_rs_case(dut, 1, 0, 1) + #yield from check_rs_case(dut, 1, 1, 1) + + count = 0 + + #regression tests + stimulus_a = [0x80000000, 0x22cb525a, 0x40000000, 0x83e73d5c, + 0xbf9b1e94, 0x34082401, + 0x5e8ef81, 0x5c75da81, 0x2b017] + stimulus_b = [0xff800001, 0xadd79efa, 0xC0000000, 0x1c800000, + 0xc038ed3a, 0xb328cd45, + 0x114f3db, 0x2f642a39, 0xff3807ab] + yield from run_test(dut, stimulus_a, stimulus_b, add, get_rs_case) + count += len(stimulus_a) + print (count, "vectors passed") + + yield from run_corner_cases(dut, count, add, get_rs_case) + yield from run_edge_cases(dut, count, add, get_rs_case) + +if __name__ == '__main__': + dut = FPADD(width=32, id_wid=5, single_cycle=True) + run_simulation(dut, testbench(dut), vcd_name="test_add.vcd") + diff --git a/src/ieee754/add/test_add16.py b/src/ieee754/add/test_add16.py new file mode 100644 index 00000000..f39ae8ae --- /dev/null +++ b/src/ieee754/add/test_add16.py @@ -0,0 +1,44 @@ +from operator import add + +from nmigen import Module, Signal +from nmigen.compat.sim import run_simulation + +from nmigen_add_experiment import FPADD + +from unit_test_half import (get_mantissa, get_exponent, get_sign, is_nan, + is_inf, is_pos_inf, is_neg_inf, + match, get_case, check_case, run_test, + run_edge_cases, run_corner_cases) + +def testbench(dut): + #yield from check_case(dut, 0x7800, 0xff6f, 0xff6f) + #yield from check_case(dut, 0x0000, 0x7c32, 0x7e32) + #yield from check_case(dut, 0x0000, 0x7da9, 0x7fa9) + #yield from check_case(dut, 0x0000, 0x7ea0, 0x7ea0) + #yield from check_case(dut, 0x7c9a, 0x8000, 0x7e9a) + #yield from check_case(dut, 0x7d5e, 0x0000, 0x7f5e) + #yield from check_case(dut, 0x8000, 0x7c8c, 0x7e8c) + #yield from check_case(dut, 0x8000, 0xfc55, 0xfe55) + #yield from check_case(dut, 0x8000, 0x7e1a, 0x7e1a) + + #yield from check_case(dut, 0x8000, 0xfc01, 0x7e00) + yield from check_case(dut, 0xfc00, 0x7c00, 0x7e00) + yield from check_case(dut, 0x8000, 0, 0) + yield from check_case(dut, 0, 0, 0) + + count = 0 + + #regression tests + stimulus_a = [ 0x8000, 0x8000 ] + stimulus_b = [ 0x0000, 0xfc01 ] + yield from run_test(dut, stimulus_a, stimulus_b, add) + count += len(stimulus_a) + print (count, "vectors passed") + + yield from run_corner_cases(dut, count, add) + yield from run_edge_cases(dut, count, add) + +if __name__ == '__main__': + dut = FPADD(width=16, single_cycle=True) + run_simulation(dut, testbench(dut), vcd_name="test_add16.vcd") + diff --git a/src/ieee754/add/test_add64.py b/src/ieee754/add/test_add64.py new file mode 100644 index 00000000..dcca12c6 --- /dev/null +++ b/src/ieee754/add/test_add64.py @@ -0,0 +1,45 @@ +from nmigen import Module, Signal +from nmigen.compat.sim import run_simulation +from operator import add + +from nmigen_add_experiment import FPADD + +import sys +import atexit +from random import randint +from random import seed + +from unit_test_double import (get_mantissa, get_exponent, get_sign, is_nan, + is_inf, is_pos_inf, is_neg_inf, + match, get_case, check_case, run_test, + run_edge_cases, run_corner_cases) + + +def testbench(dut): + yield from check_case(dut, 0, 0, 0) + yield from check_case(dut, 0x3FF0000000000000, 0x4000000000000000, + 0x4008000000000000) + yield from check_case(dut, 0x4000000000000000, 0x3FF0000000000000, + 0x4008000000000000) + yield from check_case(dut, 0x4056C00000000000, 0x4042800000000000, + 0x4060000000000000) + yield from check_case(dut, 0x4056C00000000000, 0x4042EA3D70A3D70A, + 0x40601A8F5C28F5C2) + + count = 0 + + #regression tests + stimulus_a = [0x3ff00000000000c5, 0xff80000000000000] + stimulus_b = [0xbd28a404211fb72b, 0x7f80000000000000] + yield from run_test(dut, stimulus_a, stimulus_b, add) + count += len(stimulus_a) + print (count, "vectors passed") + + yield from run_corner_cases(dut, count, add) + yield from run_edge_cases(dut, count, add) + + +if __name__ == '__main__': + dut = FPADD(width=64, single_cycle=False) + run_simulation(dut, testbench(dut), vcd_name="test_add64.vcd") + diff --git a/src/ieee754/add/test_add_base.py b/src/ieee754/add/test_add_base.py new file mode 100644 index 00000000..248f719a --- /dev/null +++ b/src/ieee754/add/test_add_base.py @@ -0,0 +1,94 @@ +from random import randint +from operator import add + +from nmigen import Module, Signal +from nmigen.compat.sim import run_simulation + +from nmigen_add_experiment import FPADDBase, FPADDBaseMod + +def get_case(dut, a, b, mid): + yield dut.in_mid.eq(mid) + yield dut.in_a.eq(a) + yield dut.in_b.eq(b) + yield dut.in_t.stb.eq(1) + yield + yield + yield + yield + ack = (yield dut.in_t.ack) + assert ack == 0 + + yield dut.in_t.stb.eq(0) + + yield dut.out_z.ack.eq(1) + + while True: + out_z_stb = (yield dut.out_z.stb) + if not out_z_stb: + yield + continue + out_z = yield dut.out_z.v + out_mid = yield dut.out_mid + yield dut.out_z.ack.eq(0) + yield + break + + return out_z, out_mid + +def check_case(dut, a, b, z, mid=None): + if mid is None: + mid = randint(0, 6) + out_z, out_mid = yield from get_case(dut, a, b, mid) + assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z) + assert out_mid == mid, "Output mid 0x%x != expected 0x%x" % (out_mid, mid) + + + +def testbench(dut): + yield from check_case(dut, 0x36093399, 0x7f6a12f1, 0x7f6a12f1) + yield from check_case(dut, 0x006CE3EE, 0x806CE3EC, 0x00000002) + yield from check_case(dut, 0x00000047, 0x80000048, 0x80000001) + yield from check_case(dut, 0x000116C2, 0x8001170A, 0x80000048) + yield from check_case(dut, 0x7ed01f25, 0xff559e2c, 0xfedb1d33) + yield from check_case(dut, 0, 0, 0) + yield from check_case(dut, 0xFFFFFFFF, 0xC63B800A, 0x7FC00000) + yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000) + #yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000) + yield from check_case(dut, 0x7F800000, 0xFF800000, 0x7FC00000) + yield from check_case(dut, 0x42540000, 0xC2540000, 0x00000000) + yield from check_case(dut, 0xC2540000, 0x42540000, 0x00000000) + yield from check_case(dut, 0xfe34f995, 0xff5d59ad, 0xff800000) + yield from check_case(dut, 0x82471f51, 0x243985f, 0x801c3790) + yield from check_case(dut, 0x40000000, 0xc0000000, 0x00000000) + yield from check_case(dut, 0x3F800000, 0x40000000, 0x40400000) + yield from check_case(dut, 0x40000000, 0x3F800000, 0x40400000) + yield from check_case(dut, 0x447A0000, 0x4488B000, 0x4502D800) + yield from check_case(dut, 0x463B800A, 0x42BA8A3D, 0x463CF51E) + yield from check_case(dut, 0x42BA8A3D, 0x463B800A, 0x463CF51E) + yield from check_case(dut, 0x463B800A, 0xC2BA8A3D, 0x463A0AF6) + yield from check_case(dut, 0xC2BA8A3D, 0x463B800A, 0x463A0AF6) + yield from check_case(dut, 0xC63B800A, 0x42BA8A3D, 0xC63A0AF6) + yield from check_case(dut, 0x42BA8A3D, 0xC63B800A, 0xC63A0AF6) + yield from check_case(dut, 0x7F800000, 0x00000000, 0x7F800000) + yield from check_case(dut, 0x00000000, 0x7F800000, 0x7F800000) + yield from check_case(dut, 0xFF800000, 0x00000000, 0xFF800000) + yield from check_case(dut, 0x00000000, 0xFF800000, 0xFF800000) + yield from check_case(dut, 0x7F800000, 0x7F800000, 0x7F800000) + yield from check_case(dut, 0xFF800000, 0xFF800000, 0xFF800000) + yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000) + yield from check_case(dut, 0x00018643, 0x00FA72A4, 0x00FBF8E7) + yield from check_case(dut, 0x001A2239, 0x00FA72A4, 0x010A4A6E) + yield from check_case(dut, 0x3F7FFFFE, 0x3F7FFFFE, 0x3FFFFFFE) + yield from check_case(dut, 0x7EFFFFEE, 0x7EFFFFEE, 0x7F7FFFEE) + yield from check_case(dut, 0x7F7FFFEE, 0xFEFFFFEE, 0x7EFFFFEE) + yield from check_case(dut, 0x7F7FFFEE, 0x756CA884, 0x7F7FFFFD) + yield from check_case(dut, 0x7F7FFFEE, 0x758A0CF8, 0x7F7FFFFF) + yield from check_case(dut, 0x42500000, 0x51A7A358, 0x51A7A358) + yield from check_case(dut, 0x51A7A358, 0x42500000, 0x51A7A358) + yield from check_case(dut, 0x4E5693A4, 0x42500000, 0x4E5693A5) + yield from check_case(dut, 0x42500000, 0x4E5693A4, 0x4E5693A5) + +if __name__ == '__main__': + dut = FPADDBaseMod(width=32, id_wid=5, single_cycle=True) + run_simulation(dut, testbench(dut), vcd_name="test_add.vcd") + diff --git a/src/ieee754/add/test_buf_pipe.py b/src/ieee754/add/test_buf_pipe.py new file mode 100644 index 00000000..37f2b31f --- /dev/null +++ b/src/ieee754/add/test_buf_pipe.py @@ -0,0 +1,1308 @@ +""" Unit tests for Buffered and Unbuffered pipelines + + contains useful worked examples of how to use the Pipeline API, + including: + + * Combinatorial Stage "Chaining" + * class-based data stages + * nmigen module-based data stages + * special nmigen module-based data stage, where the stage *is* the module + * Record-based data stages + * static-class data stages + * multi-stage pipelines (and how to connect them) + * how to *use* the pipelines (see Test5) - how to get data in and out + +""" + +from nmigen import Module, Signal, Mux, Const, Elaboratable +from nmigen.hdl.rec import Record +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil + +from example_buf_pipe import ExampleBufPipe, ExampleBufPipeAdd +from example_buf_pipe import ExamplePipeline, UnbufferedPipeline +from example_buf_pipe import ExampleStageCls +from example_buf_pipe import PrevControl, NextControl, BufferedHandshake +from example_buf_pipe import StageChain, ControlBase, StageCls +from singlepipe import UnbufferedPipeline2 +from singlepipe import SimpleHandshake +from singlepipe import PassThroughHandshake +from singlepipe import PassThroughStage +from singlepipe import FIFOControl +from singlepipe import RecordObject + +from random import randint, seed + +#seed(4) + + +def check_o_n_valid(dut, val): + o_n_valid = yield dut.n.valid_o + assert o_n_valid == val + +def check_o_n_valid2(dut, val): + o_n_valid = yield dut.n.valid_o + assert o_n_valid == val + + +def tbench(dut): + #yield dut.i_p_rst.eq(1) + yield dut.n.ready_i.eq(0) + #yield dut.p.ready_o.eq(0) + yield + yield + #yield dut.i_p_rst.eq(0) + yield dut.n.ready_i.eq(1) + yield dut.p.data_i.eq(5) + yield dut.p.valid_i.eq(1) + yield + + yield dut.p.data_i.eq(7) + yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed + yield + yield from check_o_n_valid(dut, 1) # ok *now* i_p_valid effect is felt + + yield dut.p.data_i.eq(2) + yield + yield dut.n.ready_i.eq(0) # begin going into "stall" (next stage says ready) + yield dut.p.data_i.eq(9) + yield + yield dut.p.valid_i.eq(0) + yield dut.p.data_i.eq(12) + yield + yield dut.p.data_i.eq(32) + yield dut.n.ready_i.eq(1) + yield + yield from check_o_n_valid(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid(dut, 0) # buffer outputted, *now* we're done. + yield + + +def tbench2(dut): + #yield dut.p.i_rst.eq(1) + yield dut.n.ready_i.eq(0) + #yield dut.p.ready_o.eq(0) + yield + yield + #yield dut.p.i_rst.eq(0) + yield dut.n.ready_i.eq(1) + yield dut.p.data_i.eq(5) + yield dut.p.valid_i.eq(1) + yield + + yield dut.p.data_i.eq(7) + yield from check_o_n_valid2(dut, 0) # effects of i_p_valid delayed 2 clocks + yield + yield from check_o_n_valid2(dut, 0) # effects of i_p_valid delayed 2 clocks + + yield dut.p.data_i.eq(2) + yield + yield from check_o_n_valid2(dut, 1) # ok *now* i_p_valid effect is felt + yield dut.n.ready_i.eq(0) # begin going into "stall" (next stage says ready) + yield dut.p.data_i.eq(9) + yield + yield dut.p.valid_i.eq(0) + yield dut.p.data_i.eq(12) + yield + yield dut.p.data_i.eq(32) + yield dut.n.ready_i.eq(1) + yield + yield from check_o_n_valid2(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid2(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid2(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid2(dut, 0) # buffer outputted, *now* we're done. + yield + yield + yield + + +class Test3: + def __init__(self, dut, resultfn): + self.dut = dut + self.resultfn = resultfn + self.data = [] + for i in range(num_tests): + #data.append(randint(0, 1<<16-1)) + self.data.append(i+1) + self.i = 0 + self.o = 0 + + def send(self): + while self.o != len(self.data): + send_range = randint(0, 3) + for j in range(randint(1,10)): + if send_range == 0: + send = True + else: + send = randint(0, send_range) != 0 + o_p_ready = yield self.dut.p.ready_o + if not o_p_ready: + yield + continue + if send and self.i != len(self.data): + yield self.dut.p.valid_i.eq(1) + yield self.dut.p.data_i.eq(self.data[self.i]) + self.i += 1 + else: + yield self.dut.p.valid_i.eq(0) + yield + + def rcv(self): + while self.o != len(self.data): + stall_range = randint(0, 3) + for j in range(randint(1,10)): + stall = randint(0, stall_range) != 0 + yield self.dut.n.ready_i.eq(stall) + yield + o_n_valid = yield self.dut.n.valid_o + i_n_ready = yield self.dut.n.ready_i_test + if not o_n_valid or not i_n_ready: + continue + data_o = yield self.dut.n.data_o + self.resultfn(data_o, self.data[self.o], self.i, self.o) + self.o += 1 + if self.o == len(self.data): + break + +def resultfn_3(data_o, expected, i, o): + assert data_o == expected + 1, \ + "%d-%d data %x not match %x\n" \ + % (i, o, data_o, expected) + +def data_placeholder(): + data = [] + for i in range(num_tests): + d = PlaceHolder() + d.src1 = randint(0, 1<<16-1) + d.src2 = randint(0, 1<<16-1) + data.append(d) + return data + +def data_dict(): + data = [] + for i in range(num_tests): + data.append({'src1': randint(0, 1<<16-1), + 'src2': randint(0, 1<<16-1)}) + return data + + +class Test5: + def __init__(self, dut, resultfn, data=None, stage_ctl=False): + self.dut = dut + self.resultfn = resultfn + self.stage_ctl = stage_ctl + if data: + self.data = data + else: + self.data = [] + for i in range(num_tests): + self.data.append((randint(0, 1<<16-1), randint(0, 1<<16-1))) + self.i = 0 + self.o = 0 + + def send(self): + while self.o != len(self.data): + send_range = randint(0, 3) + for j in range(randint(1,10)): + if send_range == 0: + send = True + else: + send = randint(0, send_range) != 0 + #send = True + o_p_ready = yield self.dut.p.ready_o + if not o_p_ready: + yield + continue + if send and self.i != len(self.data): + yield self.dut.p.valid_i.eq(1) + for v in self.dut.set_input(self.data[self.i]): + yield v + self.i += 1 + else: + yield self.dut.p.valid_i.eq(0) + yield + + def rcv(self): + while self.o != len(self.data): + stall_range = randint(0, 3) + for j in range(randint(1,10)): + ready = randint(0, stall_range) != 0 + #ready = True + yield self.dut.n.ready_i.eq(ready) + yield + o_n_valid = yield self.dut.n.valid_o + i_n_ready = yield self.dut.n.ready_i_test + if not o_n_valid or not i_n_ready: + continue + if isinstance(self.dut.n.data_o, Record): + data_o = {} + dod = self.dut.n.data_o + for k, v in dod.fields.items(): + data_o[k] = yield v + else: + data_o = yield self.dut.n.data_o + self.resultfn(data_o, self.data[self.o], self.i, self.o) + self.o += 1 + if self.o == len(self.data): + break + +def resultfn_5(data_o, expected, i, o): + res = expected[0] + expected[1] + assert data_o == res, \ + "%d-%d data %x not match %s\n" \ + % (i, o, data_o, repr(expected)) + +def tbench4(dut): + data = [] + for i in range(num_tests): + #data.append(randint(0, 1<<16-1)) + data.append(i+1) + i = 0 + o = 0 + while True: + stall = randint(0, 3) != 0 + send = randint(0, 5) != 0 + yield dut.n.ready_i.eq(stall) + o_p_ready = yield dut.p.ready_o + if o_p_ready: + if send and i != len(data): + yield dut.p.valid_i.eq(1) + yield dut.p.data_i.eq(data[i]) + i += 1 + else: + yield dut.p.valid_i.eq(0) + yield + o_n_valid = yield dut.n.valid_o + i_n_ready = yield dut.n.ready_i_test + if o_n_valid and i_n_ready: + data_o = yield dut.n.data_o + assert data_o == data[o] + 2, "%d-%d data %x not match %x\n" \ + % (i, o, data_o, data[o]) + o += 1 + if o == len(data): + break + +###################################################################### +# Test 2 and 4 +###################################################################### + +class ExampleBufPipe2(ControlBase): + """ Example of how to do chained pipeline stages. + """ + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + + pipe1 = ExampleBufPipe() + pipe2 = ExampleBufPipe() + + m.submodules.pipe1 = pipe1 + m.submodules.pipe2 = pipe2 + + m.d.comb += self.connect([pipe1, pipe2]) + + return m + + +###################################################################### +# Test 9 +###################################################################### + +class ExampleBufPipeChain2(BufferedHandshake): + """ connects two stages together as a *single* combinatorial stage. + """ + def __init__(self): + stage1 = ExampleStageCls() + stage2 = ExampleStageCls() + combined = StageChain([stage1, stage2]) + BufferedHandshake.__init__(self, combined) + + +def data_chain2(): + data = [] + for i in range(num_tests): + data.append(randint(0, 1<<16-2)) + return data + + +def resultfn_9(data_o, expected, i, o): + res = expected + 2 + assert data_o == res, \ + "%d-%d received data %x not match expected %x\n" \ + % (i, o, data_o, res) + + +###################################################################### +# Test 6 and 10 +###################################################################### + +class SetLessThan(Elaboratable): + def __init__(self, width, signed): + self.m = Module() + self.src1 = Signal((width, signed), name="src1") + self.src2 = Signal((width, signed), name="src2") + self.output = Signal(width, name="out") + + def elaborate(self, platform): + self.m.d.comb += self.output.eq(Mux(self.src1 < self.src2, 1, 0)) + return self.m + + +class LTStage(StageCls): + """ module-based stage example + """ + def __init__(self): + self.slt = SetLessThan(16, True) + + def ispec(self, name): + return (Signal(16, name="%s_sig1" % name), + Signal(16, name="%s_sig2" % name)) + + def ospec(self, name): + return Signal(16, "%s_out" % name) + + def setup(self, m, i): + self.o = Signal(16) + m.submodules.slt = self.slt + m.d.comb += self.slt.src1.eq(i[0]) + m.d.comb += self.slt.src2.eq(i[1]) + m.d.comb += self.o.eq(self.slt.output) + + def process(self, i): + return self.o + + +class LTStageDerived(SetLessThan, StageCls): + """ special version of a nmigen module where the module is also a stage + + shows that you don't actually need to combinatorially connect + to the outputs, or add the module as a submodule: just return + the module output parameter(s) from the Stage.process() function + """ + + def __init__(self): + SetLessThan.__init__(self, 16, True) + + def ispec(self): + return (Signal(16), Signal(16)) + + def ospec(self): + return Signal(16) + + def setup(self, m, i): + m.submodules.slt = self + m.d.comb += self.src1.eq(i[0]) + m.d.comb += self.src2.eq(i[1]) + + def process(self, i): + return self.output + + +class ExampleLTPipeline(UnbufferedPipeline): + """ an example of how to use the unbuffered pipeline. + """ + + def __init__(self): + stage = LTStage() + UnbufferedPipeline.__init__(self, stage) + + +class ExampleLTBufferedPipeDerived(BufferedHandshake): + """ an example of how to use the buffered pipeline. + """ + + def __init__(self): + stage = LTStageDerived() + BufferedHandshake.__init__(self, stage) + + +def resultfn_6(data_o, expected, i, o): + res = 1 if expected[0] < expected[1] else 0 + assert data_o == res, \ + "%d-%d data %x not match %s\n" \ + % (i, o, data_o, repr(expected)) + + +###################################################################### +# Test 7 +###################################################################### + +class ExampleAddRecordStage(StageCls): + """ example use of a Record + """ + + record_spec = [('src1', 16), ('src2', 16)] + def ispec(self): + """ returns a Record using the specification + """ + return Record(self.record_spec) + + def ospec(self): + return Record(self.record_spec) + + def process(self, i): + """ process the input data, returning a dictionary with key names + that exactly match the Record's attributes. + """ + return {'src1': i.src1 + 1, + 'src2': i.src2 + 1} + +###################################################################### +# Test 11 +###################################################################### + +class ExampleAddRecordPlaceHolderStage(StageCls): + """ example use of a Record, with a placeholder as the processing result + """ + + record_spec = [('src1', 16), ('src2', 16)] + def ispec(self): + """ returns a Record using the specification + """ + return Record(self.record_spec) + + def ospec(self): + return Record(self.record_spec) + + def process(self, i): + """ process the input data, returning a PlaceHolder class instance + with attributes that exactly match those of the Record. + """ + o = PlaceHolder() + o.src1 = i.src1 + 1 + o.src2 = i.src2 + 1 + return o + + +# a dummy class that may have stuff assigned to instances once created +class PlaceHolder: pass + + +class ExampleAddRecordPipe(UnbufferedPipeline): + """ an example of how to use the combinatorial pipeline. + """ + + def __init__(self): + stage = ExampleAddRecordStage() + UnbufferedPipeline.__init__(self, stage) + + +def resultfn_7(data_o, expected, i, o): + res = (expected['src1'] + 1, expected['src2'] + 1) + assert data_o['src1'] == res[0] and data_o['src2'] == res[1], \ + "%d-%d data %s not match %s\n" \ + % (i, o, repr(data_o), repr(expected)) + + +class ExampleAddRecordPlaceHolderPipe(UnbufferedPipeline): + """ an example of how to use the combinatorial pipeline. + """ + + def __init__(self): + stage = ExampleAddRecordPlaceHolderStage() + UnbufferedPipeline.__init__(self, stage) + + +def resultfn_test11(data_o, expected, i, o): + res1 = expected.src1 + 1 + res2 = expected.src2 + 1 + assert data_o['src1'] == res1 and data_o['src2'] == res2, \ + "%d-%d data %s not match %s\n" \ + % (i, o, repr(data_o), repr(expected)) + + +###################################################################### +# Test 8 +###################################################################### + + +class Example2OpClass: + """ an example of a class used to store 2 operands. + requires an eq function, to conform with the pipeline stage API + """ + + def __init__(self): + self.op1 = Signal(16) + self.op2 = Signal(16) + + def eq(self, i): + return [self.op1.eq(i.op1), self.op2.eq(i.op2)] + + +class ExampleAddClassStage(StageCls): + """ an example of how to use the buffered pipeline, as a class instance + """ + + def ispec(self): + """ returns an instance of an Example2OpClass. + """ + return Example2OpClass() + + def ospec(self): + """ returns an output signal which will happen to contain the sum + of the two inputs + """ + return Signal(16, name="add2_out") + + def process(self, i): + """ process the input data (sums the values in the tuple) and returns it + """ + return i.op1 + i.op2 + + +class ExampleBufPipeAddClass(BufferedHandshake): + """ an example of how to use the buffered pipeline, using a class instance + """ + + def __init__(self): + addstage = ExampleAddClassStage() + BufferedHandshake.__init__(self, addstage) + + +class TestInputAdd: + """ the eq function, called by set_input, needs an incoming object + that conforms to the Example2OpClass.eq function requirements + easiest way to do that is to create a class that has the exact + same member layout (self.op1, self.op2) as Example2OpClass + """ + def __init__(self, op1, op2): + self.op1 = op1 + self.op2 = op2 + + +def resultfn_8(data_o, expected, i, o): + res = expected.op1 + expected.op2 # these are a TestInputAdd instance + assert data_o == res, \ + "%d-%d data %s res %x not match %s\n" \ + % (i, o, repr(data_o), res, repr(expected)) + +def data_2op(): + data = [] + for i in range(num_tests): + data.append(TestInputAdd(randint(0, 1<<16-1), randint(0, 1<<16-1))) + return data + + +###################################################################### +# Test 12 +###################################################################### + +class ExampleStageDelayCls(StageCls, Elaboratable): + """ an example of how to use the buffered pipeline, in a static class + fashion + """ + + def __init__(self, valid_trigger=2): + self.count = Signal(2) + self.valid_trigger = valid_trigger + + def ispec(self): + return Signal(16, name="example_input_signal") + + def ospec(self): + return Signal(16, name="example_output_signal") + + @property + def d_ready(self): + """ data is ready to be accepted when this is true + """ + return (self.count == 1)# | (self.count == 3) + return Const(1) + + def d_valid(self, ready_i): + """ data is valid at output when this is true + """ + return self.count == self.valid_trigger + return Const(1) + + def process(self, i): + """ process the input data and returns it (adds 1) + """ + return i + 1 + + def elaborate(self, platform): + m = Module() + m.d.sync += self.count.eq(self.count + 1) + return m + + +class ExampleBufDelayedPipe(BufferedHandshake): + + def __init__(self): + stage = ExampleStageDelayCls(valid_trigger=2) + BufferedHandshake.__init__(self, stage, stage_ctl=True) + + def elaborate(self, platform): + m = BufferedHandshake.elaborate(self, platform) + m.submodules.stage = self.stage + return m + + +def data_chain1(): + data = [] + for i in range(num_tests): + data.append(1<<((i*3)%15)) + #data.append(randint(0, 1<<16-2)) + #print (hex(data[-1])) + return data + + +def resultfn_12(data_o, expected, i, o): + res = expected + 1 + assert data_o == res, \ + "%d-%d data %x not match %x\n" \ + % (i, o, data_o, res) + + +###################################################################### +# Test 13 +###################################################################### + +class ExampleUnBufDelayedPipe(BufferedHandshake): + + def __init__(self): + stage = ExampleStageDelayCls(valid_trigger=3) + BufferedHandshake.__init__(self, stage, stage_ctl=True) + + def elaborate(self, platform): + m = BufferedHandshake.elaborate(self, platform) + m.submodules.stage = self.stage + return m + +###################################################################### +# Test 15 +###################################################################### + +class ExampleBufModeAdd1Pipe(SimpleHandshake): + + def __init__(self): + stage = ExampleStageCls() + SimpleHandshake.__init__(self, stage) + + +###################################################################### +# Test 16 +###################################################################### + +class ExampleBufModeUnBufPipe(ControlBase): + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + + pipe1 = ExampleBufModeAdd1Pipe() + pipe2 = ExampleBufAdd1Pipe() + + m.submodules.pipe1 = pipe1 + m.submodules.pipe2 = pipe2 + + m.d.comb += self.connect([pipe1, pipe2]) + + return m + +###################################################################### +# Test 17 +###################################################################### + +class ExampleUnBufAdd1Pipe2(UnbufferedPipeline2): + + def __init__(self): + stage = ExampleStageCls() + UnbufferedPipeline2.__init__(self, stage) + + +###################################################################### +# Test 18 +###################################################################### + +class PassThroughTest(PassThroughHandshake): + + def iospecfn(self): + return Signal(16, "out") + + def __init__(self): + stage = PassThroughStage(self.iospecfn) + PassThroughHandshake.__init__(self, stage) + +def resultfn_identical(data_o, expected, i, o): + res = expected + assert data_o == res, \ + "%d-%d data %x not match %x\n" \ + % (i, o, data_o, res) + + +###################################################################### +# Test 19 +###################################################################### + +class ExamplePassAdd1Pipe(PassThroughHandshake): + + def __init__(self): + stage = ExampleStageCls() + PassThroughHandshake.__init__(self, stage) + + +class ExampleBufPassThruPipe(ControlBase): + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + + # XXX currently fails: any other permutation works fine. + # p1=u,p2=b ok p1=u,p2=u ok p1=b,p2=b ok + # also fails using UnbufferedPipeline as well + pipe1 = ExampleBufModeAdd1Pipe() + pipe2 = ExamplePassAdd1Pipe() + + m.submodules.pipe1 = pipe1 + m.submodules.pipe2 = pipe2 + + m.d.comb += self.connect([pipe1, pipe2]) + + return m + + +###################################################################### +# Test 20 +###################################################################### + +def iospecfn(): + return Signal(16, name="d_in") + +class FIFOTest16(FIFOControl): + + def __init__(self): + stage = PassThroughStage(iospecfn) + FIFOControl.__init__(self, 2, stage) + + +###################################################################### +# Test 21 +###################################################################### + +class ExampleFIFOPassThruPipe1(ControlBase): + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + + pipe1 = FIFOTest16() + pipe2 = FIFOTest16() + pipe3 = ExamplePassAdd1Pipe() + + m.submodules.pipe1 = pipe1 + m.submodules.pipe2 = pipe2 + m.submodules.pipe3 = pipe3 + + m.d.comb += self.connect([pipe1, pipe2, pipe3]) + + return m + + +###################################################################### +# Test 22 +###################################################################### + +class Example2OpRecord(RecordObject): + def __init__(self): + RecordObject.__init__(self) + self.op1 = Signal(16) + self.op2 = Signal(16) + + +class ExampleAddRecordObjectStage(StageCls): + + def ispec(self): + """ returns an instance of an Example2OpRecord. + """ + return Example2OpRecord() + + def ospec(self): + """ returns an output signal which will happen to contain the sum + of the two inputs + """ + return Signal(16) + + def process(self, i): + """ process the input data (sums the values in the tuple) and returns it + """ + return i.op1 + i.op2 + + +class ExampleRecordHandshakeAddClass(SimpleHandshake): + + def __init__(self): + addstage = ExampleAddRecordObjectStage() + SimpleHandshake.__init__(self, stage=addstage) + + +###################################################################### +# Test 23 +###################################################################### + +def iospecfnrecord(): + return Example2OpRecord() + +class FIFOTestRecordControl(FIFOControl): + + def __init__(self): + stage = PassThroughStage(iospecfnrecord) + FIFOControl.__init__(self, 2, stage) + + +class ExampleFIFORecordObjectPipe(ControlBase): + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + + pipe1 = FIFOTestRecordControl() + pipe2 = ExampleRecordHandshakeAddClass() + + m.submodules.pipe1 = pipe1 + m.submodules.pipe2 = pipe2 + + m.d.comb += self.connect([pipe1, pipe2]) + + return m + + +###################################################################### +# Test 24 +###################################################################### + +class FIFOTestRecordAddStageControl(FIFOControl): + + def __init__(self): + stage = ExampleAddRecordObjectStage() + FIFOControl.__init__(self, 2, stage) + + + +###################################################################### +# Test 25 +###################################################################### + +class FIFOTestAdd16(FIFOControl): + + def __init__(self): + stage = ExampleStageCls() + FIFOControl.__init__(self, 2, stage) + + +class ExampleFIFOAdd2Pipe(ControlBase): + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + + pipe1 = FIFOTestAdd16() + pipe2 = FIFOTestAdd16() + + m.submodules.pipe1 = pipe1 + m.submodules.pipe2 = pipe2 + + m.d.comb += self.connect([pipe1, pipe2]) + + return m + + +###################################################################### +# Test 26 +###################################################################### + +def iospecfn24(): + return (Signal(16, name="src1"), Signal(16, name="src2")) + +class FIFOTest2x16(FIFOControl): + + def __init__(self): + stage = PassThroughStage(iospecfn2) + FIFOControl.__init__(self, 2, stage) + + +###################################################################### +# Test 997 +###################################################################### + +class ExampleBufPassThruPipe2(ControlBase): + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + + # XXX currently fails: any other permutation works fine. + # p1=u,p2=b ok p1=u,p2=u ok p1=b,p2=b ok + # also fails using UnbufferedPipeline as well + #pipe1 = ExampleUnBufAdd1Pipe() + #pipe2 = ExampleBufAdd1Pipe() + pipe1 = ExampleBufAdd1Pipe() + pipe2 = ExamplePassAdd1Pipe() + + m.submodules.pipe1 = pipe1 + m.submodules.pipe2 = pipe2 + + m.d.comb += self.connect([pipe1, pipe2]) + + return m + + +###################################################################### +# Test 998 +###################################################################### + +class ExampleBufPipe3(ControlBase): + """ Example of how to do delayed pipeline, where the stage signals + whether it is ready. + """ + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + + pipe1 = ExampleBufDelayedPipe() + pipe2 = ExampleBufPipe() + + m.submodules.pipe1 = pipe1 + m.submodules.pipe2 = pipe2 + + m.d.comb += self.connect([pipe1, pipe2]) + + return m + +###################################################################### +# Test 999 - XXX FAILS +# http://bugs.libre-riscv.org/show_bug.cgi?id=57 +###################################################################### + +class ExampleBufAdd1Pipe(BufferedHandshake): + + def __init__(self): + stage = ExampleStageCls() + BufferedHandshake.__init__(self, stage) + + +class ExampleUnBufAdd1Pipe(UnbufferedPipeline): + + def __init__(self): + stage = ExampleStageCls() + UnbufferedPipeline.__init__(self, stage) + + +class ExampleBufUnBufPipe(ControlBase): + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + + # XXX currently fails: any other permutation works fine. + # p1=u,p2=b ok p1=u,p2=u ok p1=b,p2=b ok + # also fails using UnbufferedPipeline as well + #pipe1 = ExampleUnBufAdd1Pipe() + #pipe2 = ExampleBufAdd1Pipe() + pipe1 = ExampleBufAdd1Pipe() + pipe2 = ExampleUnBufAdd1Pipe() + + m.submodules.pipe1 = pipe1 + m.submodules.pipe2 = pipe2 + + m.d.comb += self.connect([pipe1, pipe2]) + + return m + + +###################################################################### +# Unit Tests +###################################################################### + +num_tests = 10 + +if __name__ == '__main__': + if False: + print ("test 1") + dut = ExampleBufPipe() + run_simulation(dut, tbench(dut), vcd_name="test_bufpipe.vcd") + + print ("test 2") + dut = ExampleBufPipe2() + run_simulation(dut, tbench2(dut), vcd_name="test_bufpipe2.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_bufpipe2.il", "w") as f: + f.write(vl) + + + print ("test 3") + dut = ExampleBufPipe() + test = Test3(dut, resultfn_3) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe3.vcd") + + print ("test 3.5") + dut = ExamplePipeline() + test = Test3(dut, resultfn_3) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_combpipe3.vcd") + + print ("test 4") + dut = ExampleBufPipe2() + run_simulation(dut, tbench4(dut), vcd_name="test_bufpipe4.vcd") + + print ("test 5") + dut = ExampleBufPipeAdd() + test = Test5(dut, resultfn_5, stage_ctl=True) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe5.vcd") + + print ("test 6") + dut = ExampleLTPipeline() + test = Test5(dut, resultfn_6) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_ltcomb6.vcd") + + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + list(dut.p.data_i) + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_ltcomb_pipe.il", "w") as f: + f.write(vl) + + print ("test 7") + dut = ExampleAddRecordPipe() + data=data_dict() + test = Test5(dut, resultfn_7, data=data) + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o, + dut.p.data_i.src1, dut.p.data_i.src2, + dut.n.data_o.src1, dut.n.data_o.src2] + vl = rtlil.convert(dut, ports=ports) + with open("test_recordcomb_pipe.il", "w") as f: + f.write(vl) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord.vcd") + + print ("test 8") + dut = ExampleBufPipeAddClass() + data=data_2op() + test = Test5(dut, resultfn_8, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe8.vcd") + + print ("test 9") + dut = ExampleBufPipeChain2() + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_bufpipechain2.il", "w") as f: + f.write(vl) + + data = data_chain2() + test = Test5(dut, resultfn_9, data=data) + run_simulation(dut, [test.send, test.rcv], + vcd_name="test_bufpipechain2.vcd") + + print ("test 10") + dut = ExampleLTBufferedPipeDerived() + test = Test5(dut, resultfn_6) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_ltbufpipe10.vcd") + vl = rtlil.convert(dut, ports=ports) + with open("test_ltbufpipe10.il", "w") as f: + f.write(vl) + + print ("test 11") + dut = ExampleAddRecordPlaceHolderPipe() + data=data_placeholder() + test = Test5(dut, resultfn_test11, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord.vcd") + + + print ("test 12") + dut = ExampleBufDelayedPipe() + data = data_chain1() + test = Test5(dut, resultfn_12, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe12.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_bufpipe12.il", "w") as f: + f.write(vl) + + print ("test 13") + dut = ExampleUnBufDelayedPipe() + data = data_chain1() + test = Test5(dut, resultfn_12, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_unbufpipe13.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_unbufpipe13.il", "w") as f: + f.write(vl) + + print ("test 15") + dut = ExampleBufModeAdd1Pipe() + data = data_chain1() + test = Test5(dut, resultfn_12, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufunbuf15.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_bufunbuf15.il", "w") as f: + f.write(vl) + + print ("test 16") + dut = ExampleBufModeUnBufPipe() + data = data_chain1() + test = Test5(dut, resultfn_9, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufunbuf16.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_bufunbuf16.il", "w") as f: + f.write(vl) + + print ("test 17") + dut = ExampleUnBufAdd1Pipe2() + data = data_chain1() + test = Test5(dut, resultfn_12, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_unbufpipe17.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_unbufpipe17.il", "w") as f: + f.write(vl) + + print ("test 18") + dut = PassThroughTest() + data = data_chain1() + test = Test5(dut, resultfn_identical, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_passthru18.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_passthru18.il", "w") as f: + f.write(vl) + + print ("test 19") + dut = ExampleBufPassThruPipe() + data = data_chain1() + test = Test5(dut, resultfn_9, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpass19.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_bufpass19.il", "w") as f: + f.write(vl) + + print ("test 20") + dut = FIFOTest16() + data = data_chain1() + test = Test5(dut, resultfn_identical, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_fifo20.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_fifo20.il", "w") as f: + f.write(vl) + + print ("test 21") + dut = ExampleFIFOPassThruPipe1() + data = data_chain1() + test = Test5(dut, resultfn_12, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_fifopass21.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_fifopass21.il", "w") as f: + f.write(vl) + + print ("test 22") + dut = ExampleRecordHandshakeAddClass() + data=data_2op() + test = Test5(dut, resultfn_8, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord22.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i.op1, dut.p.data_i.op2] + \ + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_addrecord22.il", "w") as f: + f.write(vl) + + print ("test 23") + dut = ExampleFIFORecordObjectPipe() + data=data_2op() + test = Test5(dut, resultfn_8, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord23.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i.op1, dut.p.data_i.op2] + \ + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_addrecord23.il", "w") as f: + f.write(vl) + + print ("test 24") + dut = FIFOTestRecordAddStageControl() + data=data_2op() + test = Test5(dut, resultfn_8, data=data) + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i.op1, dut.p.data_i.op2] + \ + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_addrecord24.il", "w") as f: + f.write(vl) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord24.vcd") + + print ("test 25") + dut = ExampleFIFOAdd2Pipe() + data = data_chain1() + test = Test5(dut, resultfn_9, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_add2pipe25.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_add2pipe25.il", "w") as f: + f.write(vl) + + print ("test 997") + dut = ExampleBufPassThruPipe2() + data = data_chain1() + test = Test5(dut, resultfn_9, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpass997.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_bufpass997.il", "w") as f: + f.write(vl) + + print ("test 998 (fails, bug)") + dut = ExampleBufPipe3() + data = data_chain1() + test = Test5(dut, resultfn_9, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe14.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_bufpipe14.il", "w") as f: + f.write(vl) + + print ("test 999 (expected to fail, which is a bug)") + dut = ExampleBufUnBufPipe() + data = data_chain1() + test = Test5(dut, resultfn_9, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufunbuf999.vcd") + ports = [dut.p.valid_i, dut.n.ready_i, + dut.n.valid_o, dut.p.ready_o] + \ + [dut.p.data_i] + [dut.n.data_o] + vl = rtlil.convert(dut, ports=ports) + with open("test_bufunbuf999.il", "w") as f: + f.write(vl) + diff --git a/src/ieee754/add/test_div.py b/src/ieee754/add/test_div.py new file mode 100644 index 00000000..3f192338 --- /dev/null +++ b/src/ieee754/add/test_div.py @@ -0,0 +1,47 @@ +import sys +from random import randint +from random import seed +from operator import truediv + +from nmigen import Module, Signal +from nmigen.compat.sim import run_simulation + +from nmigen_div_experiment import FPDIV + +from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan, + is_inf, is_pos_inf, is_neg_inf, + match, get_case, check_case, run_test, + run_edge_cases, run_corner_cases) + + +def testbench(dut): + yield from check_case(dut, 0x80000000, 0x00000000, 0xffc00000) + yield from check_case(dut, 0x00000000, 0x80000000, 0xffc00000) + yield from check_case(dut, 0x0002b017, 0xff3807ab, 0x80000000) + yield from check_case(dut, 0x40000000, 0x3F800000, 0x40000000) + yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000) + yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB) + yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C) + yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2) + yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8) + yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC) + yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5) + yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2) + + count = 0 + + #regression tests + stimulus_a = [0xbf9b1e94, 0x34082401, 0x5e8ef81, 0x5c75da81, 0x2b017] + stimulus_b = [0xc038ed3a, 0xb328cd45, 0x114f3db, 0x2f642a39, 0xff3807ab] + yield from run_test(dut, stimulus_a, stimulus_b, truediv, get_case) + count += len(stimulus_a) + print (count, "vectors passed") + + yield from run_corner_cases(dut, count, truediv, get_case) + yield from run_edge_cases(dut, count, truediv, get_case) + + +if __name__ == '__main__': + dut = FPDIV(width=32) + run_simulation(dut, testbench(dut), vcd_name="test_div.vcd") + diff --git a/src/ieee754/add/test_div64.py b/src/ieee754/add/test_div64.py new file mode 100644 index 00000000..5a9daf23 --- /dev/null +++ b/src/ieee754/add/test_div64.py @@ -0,0 +1,67 @@ +from nmigen import Module, Signal +from nmigen.compat.sim import run_simulation + +from nmigen_div_experiment import FPDIV + +class ORGate: + def __init__(self): + self.a = Signal() + self.b = Signal() + self.x = Signal() + + def elaborate(self, platform=None): + + m = Module() + m.d.comb += self.x.eq(self.a | self.b) + + return m + +def check_case(dut, a, b, z): + yield dut.in_a.v.eq(a) + yield dut.in_a.stb.eq(1) + yield + yield + a_ack = (yield dut.in_a.ack) + assert a_ack == 0 + yield dut.in_b.v.eq(b) + yield dut.in_b.stb.eq(1) + b_ack = (yield dut.in_b.ack) + assert b_ack == 0 + + while True: + yield + out_z_stb = (yield dut.out_z.stb) + if not out_z_stb: + continue + yield dut.in_a.stb.eq(0) + yield dut.in_b.stb.eq(0) + yield dut.out_z.ack.eq(1) + yield + yield dut.out_z.ack.eq(0) + yield + yield + break + + out_z = yield dut.out_z.v + assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z) + +def testbench(dut): + yield from check_case(dut, 0x4008000000000000, 0x3FF0000000000000, + 0x4008000000000000) + yield from check_case(dut, 0x3FF0000000000000, 0x4008000000000000, + 0x3FD5555555555555) + + if False: + yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000) + yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB) + yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C) + yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2) + yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8) + yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC) + yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5) + yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2) + +if __name__ == '__main__': + dut = FPDIV(width=64) + run_simulation(dut, testbench(dut), vcd_name="test_div64.vcd") + diff --git a/src/ieee754/add/test_dual.py b/src/ieee754/add/test_dual.py new file mode 100644 index 00000000..15f5c762 --- /dev/null +++ b/src/ieee754/add/test_dual.py @@ -0,0 +1,60 @@ +from sfpy import Float32 +from nmigen.compat.sim import run_simulation +from dual_add_experiment import ALU + + +def get_case(dut, a, b, c): + yield dut.a.v.eq(a) + yield dut.a.stb.eq(1) + yield + yield + a_ack = (yield dut.a.ack) + assert a_ack == 0 + + yield dut.a.stb.eq(0) + + yield dut.b.v.eq(b) + yield dut.b.stb.eq(1) + yield + yield + b_ack = (yield dut.b.ack) + assert b_ack == 0 + + yield dut.b.stb.eq(0) + + yield dut.c.v.eq(c) + yield dut.c.stb.eq(1) + yield + yield + c_ack = (yield dut.c.ack) + assert c_ack == 0 + + yield dut.c.stb.eq(0) + + yield dut.z.ack.eq(1) + + while True: + out_z_stb = (yield dut.z.stb) + if not out_z_stb: + yield + continue + + out_z = yield dut.z.v + + yield dut.z.ack.eq(0) + break + + return out_z + +def check_case(dut, a, b, c, z): + out_z = yield from get_case(dut, a, b, c) + assert out_z == z, "Output z 0x%x != 0x%x" % (out_z, z) + +def testbench(dut): + yield from check_case(dut, 0, 0, 0, 0) + yield from check_case(dut, 0x3F800000, 0x40000000, 0xc0000000, 0x3F800000) + +if __name__ == '__main__': + dut = ALU(width=32) + run_simulation(dut, testbench(dut), vcd_name="test_dual_add.vcd") + diff --git a/src/ieee754/add/test_fpadd_pipe.py b/src/ieee754/add/test_fpadd_pipe.py new file mode 100644 index 00000000..df25e55f --- /dev/null +++ b/src/ieee754/add/test_fpadd_pipe.py @@ -0,0 +1,126 @@ +""" key strategic example showing how to do multi-input fan-in into a + multi-stage pipeline, then multi-output fanout. + + the multiplex ID from the fan-in is passed in to the pipeline, preserved, + and used as a routing ID on the fanout. +""" + +from random import randint +from math import log +from nmigen import Module, Signal, Cat, Value +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil + +from nmigen_add_experiment import (FPADDMuxInOut,) + +from sfpy import Float32 + +class InputTest: + def __init__(self, dut): + self.dut = dut + self.di = {} + self.do = {} + self.tlen = 10 + self.width = 32 + for mid in range(dut.num_rows): + self.di[mid] = {} + self.do[mid] = [] + for i in range(self.tlen): + op1 = randint(0, (1<> (i+1)) << 1) | (m & 1) + for l in range(i): + if m & (1<<(l+1)): + calc_m |= 1 + + assert out_e == calc_e, "Output e 0x%x != expected 0x%x" % (out_e, calc_e) + assert out_m == calc_m, "Output m 0x%x != expected 0x%x" % (out_m, calc_m) + +def testbench(dut): + m_width = dut.a.m_width + e_width = dut.a.e_width + e_max = dut.a.e_max + for j in range(200): + m = randint(0, (1<> b) & ((1<> b) & ((1<> 52) - 1023 + +def get_sign(x): + return ((x & 0x8000000000000000) >> 63) + +def is_nan(x): + return get_exponent(x) == 1024 and get_mantissa(x) != 0 + +def is_inf(x): + return get_exponent(x) == 1024 and get_mantissa(x) == 0 + +def is_pos_inf(x): + return is_inf(x) and not get_sign(x) + +def is_neg_inf(x): + return is_inf(x) and get_sign(x) + +def match(x, y): + return ( + (is_pos_inf(x) and is_pos_inf(y)) or + (is_neg_inf(x) and is_neg_inf(y)) or + (is_nan(x) and is_nan(y)) or + (x == y) + ) + +def get_case(dut, a, b): + yield dut.in_a.v.eq(a) + yield dut.in_a.stb.eq(1) + yield + yield + a_ack = (yield dut.in_a.ack) + assert a_ack == 0 + yield dut.in_b.v.eq(b) + yield dut.in_b.stb.eq(1) + b_ack = (yield dut.in_b.ack) + assert b_ack == 0 + + while True: + yield + out_z_stb = (yield dut.out_z.stb) + if not out_z_stb: + continue + yield dut.in_a.stb.eq(0) + yield dut.in_b.stb.eq(0) + yield dut.out_z.ack.eq(1) + yield + yield dut.out_z.ack.eq(0) + yield + yield + break + + out_z = yield dut.out_z.v + return out_z + +def check_case(dut, a, b, z): + out_z = yield from get_case(dut, a, b) + assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z) + + +def run_test(dut, stimulus_a, stimulus_b, op): + + expected_responses = [] + actual_responses = [] + for a, b in zip(stimulus_a, stimulus_b): + af = Float64.from_bits(a) + bf = Float64.from_bits(b) + z = op(af, bf) + expected_responses.append(z.get_bits()) + #print (af, bf, z) + actual = yield from get_case(dut, a, b) + actual_responses.append(actual) + + if len(actual_responses) < len(expected_responses): + print ("Fail ... not enough results") + exit(0) + + for exp, act, a, b in zip(expected_responses, actual_responses, + stimulus_a, stimulus_b): + passed = match(exp, act) + + if not passed: + + print ("Fail ... expected:", hex(exp), "actual:", hex(act)) + + print (hex(a)) + print ("a mantissa:", a & 0x000fffffffffffff) + print ("a exponent:", ((a & 0x7ff0000000000000) >> 52)\ + - 1023) + print ("a sign:", ((a & 0x8000000000000000) >> 63)) + + print (hex(b)) + print ("b mantissa:", b & 0x000fffffffffffff) + print ("b exponent:", ((b & 0x7ff0000000000000) >> 52)\ + - 1023) + print ("b sign:", ((b & 0x8000000000000000) >> 63)) + + print (hex(exp)) + print ("expected mantissa:", exp & 0x000fffffffffffff) + print ("expected exponent:", ((exp & 0x7ff0000000000000) >> 52)\ + - 1023) + print ("expected sign:", ((exp & 0x8000000000000000) >> 63)) + + print (hex(act)) + print ("actual mantissa:", act & 0x000fffffffffffff) + print ("actual exponent:", ((act & 0x7ff0000000000000) >> 52)\ + - 1023) + print ("actual sign:", ((act & 0x8000000000000000) >> 63)) + + sys.exit(0) + + +def run_corner_cases(dut, count, op): + #corner cases + from itertools import permutations + stimulus_a = [i[0] for i in permutations([ + 0x8000000000000000, + 0x0000000000000000, + 0x7ff8000000000000, + 0xfff8000000000000, + 0x7ff0000000000000, + 0xfff0000000000000 + ], 2)] + stimulus_b = [i[1] for i in permutations([ + 0x8000000000000000, + 0x0000000000000000, + 0x7ff8000000000000, + 0xfff8000000000000, + 0x7ff0000000000000, + 0xfff0000000000000 + ], 2)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + +def run_edge_cases(dut, count, op): + #edge cases + stimulus_a = [0x8000000000000000 for i in range(1000)] + stimulus_b = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_a = [0x0000000000000000 for i in range(1000)] + stimulus_b = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_b = [0x8000000000000000 for i in range(1000)] + stimulus_a = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_b = [0x0000000000000000 for i in range(1000)] + stimulus_a = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_a = [0x7FF8000000000000 for i in range(1000)] + stimulus_b = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_a = [0xFFF8000000000000 for i in range(1000)] + stimulus_b = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_b = [0x7FF8000000000000 for i in range(1000)] + stimulus_a = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_b = [0xFFF8000000000000 for i in range(1000)] + stimulus_a = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_a = [0x7FF0000000000000 for i in range(1000)] + stimulus_b = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_a = [0xFFF0000000000000 for i in range(1000)] + stimulus_b = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_b = [0x7FF0000000000000 for i in range(1000)] + stimulus_a = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_b = [0xFFF0000000000000 for i in range(1000)] + stimulus_a = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + #seed(0) + for i in range(100000): + stimulus_a = [randint(0, 1<<64) for i in range(1000)] + stimulus_b = [randint(0, 1<<64) for i in range(1000)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += 1000 + print (count, "random vectors passed") + diff --git a/src/ieee754/add/unit_test_half.py b/src/ieee754/add/unit_test_half.py new file mode 100644 index 00000000..73c9b653 --- /dev/null +++ b/src/ieee754/add/unit_test_half.py @@ -0,0 +1,211 @@ +from random import randint +from random import seed + +import sys +from sfpy import Float16 + +def get_mantissa(x): + return 0x3ff & x + +def get_exponent(x): + return ((x & 0xf800) >> 11) - 15 + +def get_sign(x): + return ((x & 0x8000) >> 15) + +def is_nan(x): + return get_exponent(x) == 16 and get_mantissa(x) != 0 + +def is_inf(x): + return get_exponent(x) == 16 and get_mantissa(x) == 0 + +def is_pos_inf(x): + return is_inf(x) and not get_sign(x) + +def is_neg_inf(x): + return is_inf(x) and get_sign(x) + +def match(x, y): + return ( + (is_pos_inf(x) and is_pos_inf(y)) or + (is_neg_inf(x) and is_neg_inf(y)) or + (is_nan(x) and is_nan(y)) or + (x == y) + ) + +def get_case(dut, a, b): + yield dut.in_a.v.eq(a) + yield dut.in_a.stb.eq(1) + yield + yield + a_ack = (yield dut.in_a.ack) + assert a_ack == 0 + yield dut.in_b.v.eq(b) + yield dut.in_b.stb.eq(1) + b_ack = (yield dut.in_b.ack) + assert b_ack == 0 + + while True: + yield + out_z_stb = (yield dut.out_z.stb) + if not out_z_stb: + continue + yield dut.in_a.stb.eq(0) + yield dut.in_b.stb.eq(0) + yield dut.out_z.ack.eq(1) + yield + yield dut.out_z.ack.eq(0) + yield + yield + break + + out_z = yield dut.out_z.v + return out_z + +def check_case(dut, a, b, z): + out_z = yield from get_case(dut, a, b) + assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z) + + +def run_test(dut, stimulus_a, stimulus_b, op): + + expected_responses = [] + actual_responses = [] + for a, b in zip(stimulus_a, stimulus_b): + af = Float16.from_bits(a) + bf = Float16.from_bits(b) + z = op(af, bf) + expected_responses.append(z.get_bits()) + #print (af, bf, z) + actual = yield from get_case(dut, a, b) + actual_responses.append(actual) + + if len(actual_responses) < len(expected_responses): + print ("Fail ... not enough results") + exit(0) + + for expected, actual, a, b in zip(expected_responses, actual_responses, + stimulus_a, stimulus_b): + passed = match(expected, actual) + + if not passed: + + print ("Fail ... expected:", hex(expected), "actual:", hex(actual)) + + print (hex(a)) + print ("a mantissa:", get_mantissa(a)) + print ("a exponent:", get_exponent(a)) + print ("a sign:", get_sign(a)) + + print (hex(b)) + print ("b mantissa:", get_mantissa(b)) + print ("b exponent:", get_exponent(b)) + print ("b sign:", get_sign(b)) + + print (hex(expected)) + print ("expected mantissa:", get_mantissa(expected)) + print ("expected exponent:", get_exponent(expected)) + print ("expected sign:", get_sign(expected)) + + print (hex(actual)) + print ("actual mantissa:", get_mantissa(actual)) + print ("actual exponent:", get_exponent(actual)) + print ("actual sign:", get_sign(actual)) + + sys.exit(0) + +def run_corner_cases(dut, count, op): + #corner cases + corners = [0x8000, 0x0000, 0x7800, 0xf800, 0x7c00, 0xfc00] + from itertools import permutations + stimulus_a = [i[0] for i in permutations(corners, 2)] + stimulus_b = [i[1] for i in permutations(corners, 2)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + +def run_edge_cases(dut, count, op): + maxint16 = 1<<16 + maxcount = 10 + #edge cases + stimulus_a = [0x8000 for i in range(maxcount)] + stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_a = [0x0000 for i in range(maxcount)] + stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_b = [0x8000 for i in range(maxcount)] + stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_b = [0x0000 for i in range(maxcount)] + stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_a = [0x7800 for i in range(maxcount)] + stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_a = [0xF800 for i in range(maxcount)] + stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_b = [0x7800 for i in range(maxcount)] + stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_b = [0xF800 for i in range(maxcount)] + stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_a = [0x7C00 for i in range(maxcount)] + stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_a = [0xFC00 for i in range(maxcount)] + stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_b = [0x7C00 for i in range(maxcount)] + stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + stimulus_b = [0xFC00 for i in range(maxcount)] + stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += len(stimulus_a) + print (count, "vectors passed") + + #seed(0) + for i in range(100000): + stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)] + stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)] + yield from run_test(dut, stimulus_a, stimulus_b, op) + count += maxcount + print (count, "random vectors passed") + diff --git a/src/ieee754/add/unit_test_single.py b/src/ieee754/add/unit_test_single.py new file mode 100644 index 00000000..2b0d9e56 --- /dev/null +++ b/src/ieee754/add/unit_test_single.py @@ -0,0 +1,255 @@ +from random import randint +from random import seed + +import sys +from sfpy import Float32 + +def get_mantissa(x): + return 0x7fffff & x + +def get_exponent(x): + return ((x & 0x7f800000) >> 23) - 127 + +def set_exponent(x, e): + return (x & ~0x7f800000) | ((e+127) << 23) + +def get_sign(x): + return ((x & 0x80000000) >> 31) + +def is_nan(x): + return get_exponent(x) == 128 and get_mantissa(x) != 0 + +def is_inf(x): + return get_exponent(x) == 128 and get_mantissa(x) == 0 + +def is_pos_inf(x): + return is_inf(x) and not get_sign(x) + +def is_neg_inf(x): + return is_inf(x) and get_sign(x) + +def match(x, y): + return ( + (is_pos_inf(x) and is_pos_inf(y)) or + (is_neg_inf(x) and is_neg_inf(y)) or + (is_nan(x) and is_nan(y)) or + (x == y) + ) + +def get_rs_case(dut, a, b, mid): + in_a, in_b = dut.rs[0] + out_z = dut.res[0] + yield dut.ids.in_mid.eq(mid) + yield in_a.v.eq(a) + yield in_a.valid_i.eq(1) + yield + yield + yield + yield + a_ack = (yield in_a.ready_o) + assert a_ack == 0 + + yield in_a.valid_i.eq(0) + + yield in_b.v.eq(b) + yield in_b.valid_i.eq(1) + yield + yield + b_ack = (yield in_b.ready_o) + assert b_ack == 0 + + yield in_b.valid_i.eq(0) + + yield out_z.ready_i.eq(1) + + while True: + out_z_stb = (yield out_z.valid_o) + if not out_z_stb: + yield + continue + vout_z = yield out_z.v + #out_mid = yield dut.ids.out_mid + yield out_z.ready_i.eq(0) + yield + break + + return vout_z, mid + +def check_rs_case(dut, a, b, z, mid=None): + if mid is None: + mid = randint(0, 6) + mid = 0 + out_z, out_mid = yield from get_rs_case(dut, a, b, mid) + assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z) + assert out_mid == mid, "Output mid 0x%x != expected 0x%x" % (out_mid, mid) + + +def get_case(dut, a, b, mid): + #yield dut.in_mid.eq(mid) + yield dut.in_a.v.eq(a) + yield dut.in_a.valid_i_test.eq(1) + yield + yield + yield + yield + a_ack = (yield dut.in_a.ready_o) + assert a_ack == 0 + + yield dut.in_a.valid_i.eq(0) + + yield dut.in_b.v.eq(b) + yield dut.in_b.valid_i.eq(1) + yield + yield + b_ack = (yield dut.in_b.ready_o) + assert b_ack == 0 + + yield dut.in_b.valid_i.eq(0) + + yield dut.out_z.ready_i.eq(1) + + while True: + out_z_stb = (yield dut.out_z.valid_o) + if not out_z_stb: + yield + continue + out_z = yield dut.out_z.v + #out_mid = yield dut.out_mid + yield dut.out_z.ready_i.eq(0) + yield + break + + return out_z, mid # TODO: mid + +def check_case(dut, a, b, z, mid=None): + if mid is None: + mid = randint(0, 6) + mid = 0 + out_z, out_mid = yield from get_case(dut, a, b, mid) + assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z) + assert out_mid == mid, "Output mid 0x%x != expected 0x%x" % (out_mid, mid) + + +def run_test(dut, stimulus_a, stimulus_b, op, get_case_fn): + + expected_responses = [] + actual_responses = [] + for a, b in zip(stimulus_a, stimulus_b): + mid = randint(0, 6) + mid = 0 + af = Float32.from_bits(a) + bf = Float32.from_bits(b) + z = op(af, bf) + expected_responses.append((z.get_bits(), mid)) + actual = yield from get_case_fn(dut, a, b, mid) + actual_responses.append(actual) + + if len(actual_responses) < len(expected_responses): + print ("Fail ... not enough results") + exit(0) + + for expected, actual, a, b in zip(expected_responses, actual_responses, + stimulus_a, stimulus_b): + passed = match(expected[0], actual[0]) + if expected[1] != actual[1]: # check mid + print ("MID failed", expected[1], actual[1]) + sys.exit(0) + + if not passed: + + expected = expected[0] + actual = actual[0] + print ("Fail ... expected:", hex(expected), "actual:", hex(actual)) + + print (hex(a)) + print ("a mantissa:", a & 0x7fffff) + print ("a exponent:", ((a & 0x7f800000) >> 23) - 127) + print ("a sign:", ((a & 0x80000000) >> 31)) + + print (hex(b)) + print ("b mantissa:", b & 0x7fffff) + print ("b exponent:", ((b & 0x7f800000) >> 23) - 127) + print ("b sign:", ((b & 0x80000000) >> 31)) + + print (hex(expected)) + print ("expected mantissa:", expected & 0x7fffff) + print ("expected exponent:", ((expected & 0x7f800000) >> 23) - 127) + print ("expected sign:", ((expected & 0x80000000) >> 31)) + + print (hex(actual)) + print ("actual mantissa:", actual & 0x7fffff) + print ("actual exponent:", ((actual & 0x7f800000) >> 23) - 127) + print ("actual sign:", ((actual & 0x80000000) >> 31)) + + sys.exit(0) + +corner_cases = [0x80000000, 0x00000000, 0x7f800000, 0xff800000, + 0x7fc00000, 0xffc00000] + +def run_corner_cases(dut, count, op, get_case_fn): + #corner cases + from itertools import permutations + stimulus_a = [i[0] for i in permutations(corner_cases, 2)] + stimulus_b = [i[1] for i in permutations(corner_cases, 2)] + yield from run_test(dut, stimulus_a, stimulus_b, op, get_case_fn) + count += len(stimulus_a) + print (count, "vectors passed") + +def run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn): + yield from run_test(dut, stimulus_a, stimulus_b, op, get_case_fn) + yield from run_test(dut, stimulus_b, stimulus_a, op, get_case_fn) + +def run_cases(dut, count, op, fixed_num, num_entries, get_case_fn): + if isinstance(fixed_num, int): + stimulus_a = [fixed_num for i in range(num_entries)] + report = hex(fixed_num) + else: + stimulus_a = fixed_num + report = "random" + + stimulus_b = [randint(0, 1<<32) for i in range(num_entries)] + yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn) + count += len(stimulus_a) + print (count, "vectors passed 2^32", report) + + # non-canonical NaNs. + stimulus_b = [set_exponent(randint(0, 1<<32), 128) \ + for i in range(num_entries)] + yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn) + count += len(stimulus_a) + print (count, "vectors passed Non-Canonical NaN", report) + + # -127 + stimulus_b = [set_exponent(randint(0, 1<<32), -127) \ + for i in range(num_entries)] + yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn) + count += len(stimulus_a) + print (count, "vectors passed exp=-127", report) + + # nearly zero + stimulus_b = [set_exponent(randint(0, 1<<32), -126) \ + for i in range(num_entries)] + yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn) + count += len(stimulus_a) + print (count, "vectors passed exp=-126", report) + + # nearly inf + stimulus_b = [set_exponent(randint(0, 1<<32), 127) \ + for i in range(num_entries)] + yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn) + count += len(stimulus_a) + print (count, "vectors passed exp=127", report) + + return count + +def run_edge_cases(dut, count, op, get_case_fn): + #edge cases + for testme in corner_cases: + count = yield from run_cases(dut, count, op, testme, 10, get_case_fn) + + for i in range(100000): + stimulus_a = [randint(0, 1<<32) for i in range(10)] + count = yield from run_cases(dut, count, op, stimulus_a, 10, + get_case_fn) + return count +