+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from math import log
-from nmigen import Module
-from nmigen.cli import main, verilog
-
-from singlepipe import PassThroughStage
-from multipipe import CombMuxOutPipe
-from multipipe import PriorityCombMuxInPipe
-
-from fpcommon.getop import FPADDBaseData
-from fpcommon.denorm import FPSCData
-from fpcommon.pack import FPPackData
-from fpcommon.normtopack import FPNormToPack
-from fpadd.specialcases import FPAddSpecialCasesDeNorm
-from fpadd.addstages import FPAddAlignSingleAdd
-
-
-def num_bits(n):
- return int(log(n) / log(2))
-
-class FPADDInMuxPipe(PriorityCombMuxInPipe):
- def __init__(self, num_rows, iospecfn):
- self.num_rows = num_rows
- stage = PassThroughStage(iospecfn)
- PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
-
-
-class FPADDMuxOutPipe(CombMuxOutPipe):
- def __init__(self, num_rows, iospecfn):
- self.num_rows = num_rows
- stage = PassThroughStage(iospecfn)
- CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
-
-
-class ReservationStations:
- """ Reservation-Station pipeline
-
- Input: num_rows - number of input and output Reservation Stations
-
- Requires: the addition of an "alu" object, an i_specfn and an o_specfn
-
- * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
- * ALU pipeline
- * fan-out on outputs (an array of FPPackData: z,mid)
-
- Fan-in and Fan-out are combinatorial.
- """
- def __init__(self, num_rows):
- self.num_rows = num_rows
- self.inpipe = FPADDInMuxPipe(num_rows, self.i_specfn) # fan-in
- self.outpipe = FPADDMuxOutPipe(num_rows, self.o_specfn) # fan-out
-
- self.p = self.inpipe.p # kinda annoying,
- self.n = self.outpipe.n # use pipe in/out as this class in/out
- self._ports = self.inpipe.ports() + self.outpipe.ports()
-
- def elaborate(self, platform):
- m = Module()
- m.submodules.inpipe = self.inpipe
- m.submodules.alu = self.alu
- m.submodules.outpipe = self.outpipe
-
- m.d.comb += self.inpipe.n.connect_to_next(self.alu.p)
- m.d.comb += self.alu.connect_to_next(self.outpipe)
-
- return m
-
- def ports(self):
- return self._ports
-
-
+++ /dev/null
-from nmigen import *
-from nmigen.cli import main
-
-from nmigen_add_experiment import FPADD
-from fpbase import FPOp
-
-
-class Adder:
- def __init__(self, width):
- self.a = Signal(width)
- self.b = Signal(width)
- self.o = Signal(width)
-
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.o.eq(self.a + self.b)
- return m
-
-
-class Subtractor:
- def __init__(self, width):
- self.a = Signal(width)
- self.b = Signal(width)
- self.o = Signal(width)
-
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.o.eq(self.a - self.b)
- return m
-
-
-class ALU:
- def __init__(self, width):
- #self.op = Signal()
- self.a = FPOp(width)
- self.b = FPOp(width)
- self.c = FPOp(width)
- self.z = FPOp(width)
- self.int_stb = Signal()
-
- self.add1 = FPADD(width)
- self.add2 = FPADD(width)
-
- def elaborate(self, platform):
- m = Module()
- m.submodules.add1 = self.add1
- m.submodules.add2 = self.add2
- # join add1 a to a: add1.in_a = a
- m.d.comb += self.add1.in_a.chain_from(self.a)
- # join add1 b to b: add1.in_b = b
- m.d.comb += self.add1.in_b.chain_from(self.b)
- # join add2 a to c: add2.in_a = c
- m.d.comb += self.add2.in_a.chain_from(self.c)
- # join add2 b to add1 z: add2.in_b = add1.out_z
- m.d.comb += self.add2.in_b.chain_inv(self.add1.out_z)
- # join output from add2 to z: z = add2.out_z
- m.d.comb += self.z.chain_from(self.add2.out_z)
- # get at add1's stb signal
- m.d.comb += self.int_stb.eq(self.add1.out_z.stb)
- #with m.If(self.op):
- # m.d.comb += self.o.eq(self.sub.o)
- #with m.Else():
- # m.d.comb += self.o.eq(self.add.o)
- return m
-
-
-if __name__ == "__main__":
- alu = ALU(width=16)
- main(alu, ports=alu.a.ports() + \
- alu.b.ports() + \
- alu.c.ports() + \
- alu.z.ports())
+++ /dev/null
-""" Pipeline and BufferedHandshake examples
-"""
-
-from nmoperator import eq
-from iocontrol import (PrevControl, NextControl)
-from singlepipe import (PrevControl, NextControl, ControlBase,
- StageCls, Stage, StageChain,
- BufferedHandshake, UnbufferedPipeline)
-
-from nmigen import Signal, Module
-from nmigen.cli import verilog, rtlil
-
-
-class ExampleAddStage(StageCls):
- """ an example of how to use the buffered pipeline, as a class instance
- """
-
- def ispec(self):
- """ returns a tuple of input signals which will be the incoming data
- """
- return (Signal(16), Signal(16))
-
- def ospec(self):
- """ returns an output signal which will happen to contain the sum
- of the two inputs
- """
- return Signal(16)
-
- def process(self, i):
- """ process the input data (sums the values in the tuple) and returns it
- """
- return i[0] + i[1]
-
-
-class ExampleBufPipeAdd(BufferedHandshake):
- """ an example of how to use the buffered pipeline, using a class instance
- """
-
- def __init__(self):
- addstage = ExampleAddStage()
- BufferedHandshake.__init__(self, addstage)
-
-
-class ExampleStage(Stage):
- """ an example of how to use the buffered pipeline, in a static class
- fashion
- """
-
- def ispec():
- return Signal(16, name="example_input_signal")
-
- def ospec():
- return Signal(16, name="example_output_signal")
-
- def process(i):
- """ process the input data and returns it (adds 1)
- """
- return i + 1
-
-
-class ExampleStageCls(StageCls):
- """ an example of how to use the buffered pipeline, in a static class
- fashion
- """
-
- def ispec(self):
- return Signal(16, name="example_input_signal")
-
- def ospec(self):
- return Signal(16, name="example_output_signal")
-
- def process(self, i):
- """ process the input data and returns it (adds 1)
- """
- return i + 1
-
-
-class ExampleBufPipe(BufferedHandshake):
- """ an example of how to use the buffered pipeline.
- """
-
- def __init__(self):
- BufferedHandshake.__init__(self, ExampleStage)
-
-
-class ExamplePipeline(UnbufferedPipeline):
- """ an example of how to use the unbuffered pipeline.
- """
-
- def __init__(self):
- UnbufferedPipeline.__init__(self, ExampleStage)
-
-
-if __name__ == '__main__':
- dut = ExampleBufPipe()
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_bufpipe.il", "w") as f:
- f.write(vl)
-
- dut = ExamplePipeline()
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_combpipe.il", "w") as f:
- f.write(vl)
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Cat
-from nmigen.cli import main, verilog
-
-from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase
-
-from singlepipe import eq
-
-
-class FPADD(FPBase):
-
- def __init__(self, width, single_cycle=False):
- FPBase.__init__(self)
- self.width = width
- self.single_cycle = single_cycle
-
- self.in_a = FPOp(width)
- self.in_b = FPOp(width)
- self.out_z = FPOp(width)
-
- def elaborate(self, platform=None):
- """ creates the HDL code-fragment for FPAdd
- """
- m = Module()
-
- # Latches
- a = FPNumIn(self.in_a, self.width)
- b = FPNumIn(self.in_b, self.width)
- z = FPNumOut(self.width, False)
-
- m.submodules.fpnum_a = a
- m.submodules.fpnum_b = b
- m.submodules.fpnum_z = z
-
- m.d.comb += a.v.eq(self.in_a.v)
- m.d.comb += b.v.eq(self.in_b.v)
-
- w = z.m_width + 4
- tot = Signal(w, reset_less=True) # sticky/round/guard, {mantissa} result, 1 overflow
-
- of = Overflow()
-
- m.submodules.overflow = of
-
- with m.FSM() as fsm:
-
- # ******
- # gets operand a
-
- with m.State("get_a"):
- res = self.get_op(m, self.in_a, a, "get_b")
- m.d.sync += eq([a, self.in_a.ack], res)
-
- # ******
- # gets operand b
-
- with m.State("get_b"):
- res = self.get_op(m, self.in_b, b, "special_cases")
- m.d.sync += eq([b, self.in_b.ack], res)
-
- # ******
- # special cases: NaNs, infs, zeros, denormalised
- # NOTE: some of these are unique to add. see "Special Operations"
- # https://steve.hollasch.net/cgindex/coding/ieeefloat.html
-
- with m.State("special_cases"):
-
- s_nomatch = Signal()
- m.d.comb += s_nomatch.eq(a.s != b.s)
-
- m_match = Signal()
- m.d.comb += m_match.eq(a.m == b.m)
-
- # if a is NaN or b is NaN return NaN
- with m.If(a.is_nan | b.is_nan):
- m.next = "put_z"
- m.d.sync += z.nan(1)
-
- # XXX WEIRDNESS for FP16 non-canonical NaN handling
- # under review
-
- ## if a is zero and b is NaN return -b
- #with m.If(a.is_zero & (a.s==0) & b.is_nan):
- # m.next = "put_z"
- # m.d.sync += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
-
- ## if b is zero and a is NaN return -a
- #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
- # m.next = "put_z"
- # m.d.sync += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
-
- ## if a is -zero and b is NaN return -b
- #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
- # m.next = "put_z"
- # m.d.sync += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
-
- ## if b is -zero and a is NaN return -a
- #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
- # m.next = "put_z"
- # m.d.sync += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
-
- # if a is inf return inf (or NaN)
- with m.Elif(a.is_inf):
- m.next = "put_z"
- m.d.sync += z.inf(a.s)
- # if a is inf and signs don't match return NaN
- with m.If(b.exp_128 & s_nomatch):
- m.d.sync += z.nan(1)
-
- # if b is inf return inf
- with m.Elif(b.is_inf):
- m.next = "put_z"
- m.d.sync += z.inf(b.s)
-
- # if a is zero and b zero return signed-a/b
- with m.Elif(a.is_zero & b.is_zero):
- m.next = "put_z"
- m.d.sync += z.create(a.s & b.s, b.e, b.m[3:-1])
-
- # if a is zero return b
- with m.Elif(a.is_zero):
- m.next = "put_z"
- m.d.sync += z.create(b.s, b.e, b.m[3:-1])
-
- # if b is zero return a
- with m.Elif(b.is_zero):
- m.next = "put_z"
- m.d.sync += z.create(a.s, a.e, a.m[3:-1])
-
- # if a equal to -b return zero (+ve zero)
- with m.Elif(s_nomatch & m_match & (a.e == b.e)):
- m.next = "put_z"
- m.d.sync += z.zero(0)
-
- # Denormalised Number checks
- with m.Else():
- m.next = "align"
- self.denormalise(m, a)
- self.denormalise(m, b)
-
- # ******
- # align.
-
- with m.State("align"):
- if not self.single_cycle:
- # NOTE: this does *not* do single-cycle multi-shifting,
- # it *STAYS* in the align state until exponents match
-
- # exponent of a greater than b: shift b down
- with m.If(a.e > b.e):
- m.d.sync += b.shift_down()
- # exponent of b greater than a: shift a down
- with m.Elif(a.e < b.e):
- m.d.sync += a.shift_down()
- # exponents equal: move to next stage.
- with m.Else():
- m.next = "add_0"
- else:
- # This one however (single-cycle) will do the shift
- # in one go.
-
- # XXX TODO: the shifter used here is quite expensive
- # having only one would be better
-
- ediff = Signal((len(a.e), True), reset_less=True)
- ediffr = Signal((len(a.e), True), reset_less=True)
- m.d.comb += ediff.eq(a.e - b.e)
- m.d.comb += ediffr.eq(b.e - a.e)
- with m.If(ediff > 0):
- m.d.sync += b.shift_down_multi(ediff)
- # exponent of b greater than a: shift a down
- with m.Elif(ediff < 0):
- m.d.sync += a.shift_down_multi(ediffr)
-
- m.next = "add_0"
-
- # ******
- # First stage of add. covers same-sign (add) and subtract
- # special-casing when mantissas are greater or equal, to
- # give greatest accuracy.
-
- with m.State("add_0"):
- m.next = "add_1"
- m.d.sync += z.e.eq(a.e)
- # same-sign (both negative or both positive) add mantissas
- with m.If(a.s == b.s):
- m.d.sync += [
- tot.eq(Cat(a.m, 0) + Cat(b.m, 0)),
- z.s.eq(a.s)
- ]
- # a mantissa greater than b, use a
- with m.Elif(a.m >= b.m):
- m.d.sync += [
- tot.eq(Cat(a.m, 0) - Cat(b.m, 0)),
- z.s.eq(a.s)
- ]
- # b mantissa greater than a, use b
- with m.Else():
- m.d.sync += [
- tot.eq(Cat(b.m, 0) - Cat(a.m, 0)),
- z.s.eq(b.s)
- ]
-
- # ******
- # Second stage of add: preparation for normalisation.
- # detects when tot sum is too big (tot[27] is kinda a carry bit)
-
- with m.State("add_1"):
- m.next = "normalise_1"
- # tot[27] gets set when the sum overflows. shift result down
- with m.If(tot[-1]):
- m.d.sync += [
- z.m.eq(tot[4:]),
- of.m0.eq(tot[4]),
- of.guard.eq(tot[3]),
- of.round_bit.eq(tot[2]),
- of.sticky.eq(tot[1] | tot[0]),
- z.e.eq(z.e + 1)
- ]
- # tot[27] zero case
- with m.Else():
- m.d.sync += [
- z.m.eq(tot[3:]),
- of.m0.eq(tot[3]),
- of.guard.eq(tot[2]),
- of.round_bit.eq(tot[1]),
- of.sticky.eq(tot[0])
- ]
-
- # ******
- # First stage of normalisation.
-
- with m.State("normalise_1"):
- self.normalise_1(m, z, of, "normalise_2")
-
- # ******
- # Second stage of normalisation.
-
- with m.State("normalise_2"):
- self.normalise_2(m, z, of, "round")
-
- # ******
- # rounding stage
-
- with m.State("round"):
- self.roundz(m, z, of.roundz)
- m.next = "corrections"
-
- # ******
- # correction stage
-
- with m.State("corrections"):
- self.corrections(m, z, "pack")
-
- # ******
- # pack stage
-
- with m.State("pack"):
- self.pack(m, z, "put_z")
-
- # ******
- # put_z stage
-
- with m.State("put_z"):
- self.put_z(m, z, self.out_z, "get_a")
-
- return m
-
-
-if __name__ == "__main__":
- alu = FPADD(width=32)
- main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports())
-
-
- # works... but don't use, just do "python fname.py convert -t v"
- #print (verilog.convert(alu, ports=[
- # ports=alu.in_a.ports() + \
- # alu.in_b.ports() + \
- # alu.out_z.ports())
+++ /dev/null
-from nmigen import Module, Signal, Cat, Mux, Array, Const
-from nmigen.cli import main, verilog
-
-from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPState
-from fpcommon.getop import FPGetOp
-from singlepipe import eq
-
-
-class FPMUL(FPBase):
-
- def __init__(self, width):
- FPBase.__init__(self)
- self.width = width
-
- self.in_a = FPOp(width)
- self.in_b = FPOp(width)
- self.out_z = FPOp(width)
-
- self.states = []
-
- def add_state(self, state):
- self.states.append(state)
- return state
-
- def elaborate(self, platform=None):
- """ creates the HDL code-fragment for FPMUL
- """
- m = Module()
-
- # Latches
- a = FPNumIn(None, self.width, False)
- b = FPNumIn(None, self.width, False)
- z = FPNumOut(self.width, False)
-
- mw = (z.m_width)*2 - 1 + 3 # sticky/round/guard bits + (2*mant) - 1
- product = Signal(mw)
-
- of = Overflow()
- m.submodules.of = of
- m.submodules.a = a
- m.submodules.b = b
- m.submodules.z = z
-
- m.d.comb += a.v.eq(self.in_a.v)
- m.d.comb += b.v.eq(self.in_b.v)
-
- with m.FSM() as fsm:
-
- # ******
- # gets operand a
-
- with m.State("get_a"):
- res = self.get_op(m, self.in_a, a, "get_b")
- m.d.sync += eq([a, self.in_a.ack], res)
-
- # ******
- # gets operand b
-
- with m.State("get_b"):
- res = self.get_op(m, self.in_b, b, "special_cases")
- m.d.sync += eq([b, self.in_b.ack], res)
-
- # ******
- # special cases
-
- with m.State("special_cases"):
- #if a or b is NaN return NaN
- with m.If(a.is_nan | b.is_nan):
- m.next = "put_z"
- m.d.sync += z.nan(1)
- #if a is inf return inf
- with m.Elif(a.is_inf):
- m.next = "put_z"
- m.d.sync += z.inf(a.s ^ b.s)
- #if b is zero return NaN
- with m.If(b.is_zero):
- m.d.sync += z.nan(1)
- #if b is inf return inf
- with m.Elif(b.is_inf):
- m.next = "put_z"
- m.d.sync += z.inf(a.s ^ b.s)
- #if a is zero return NaN
- with m.If(a.is_zero):
- m.next = "put_z"
- m.d.sync += z.nan(1)
- #if a is zero return zero
- with m.Elif(a.is_zero):
- m.next = "put_z"
- m.d.sync += z.zero(a.s ^ b.s)
- #if b is zero return zero
- with m.Elif(b.is_zero):
- m.next = "put_z"
- m.d.sync += z.zero(a.s ^ b.s)
- # Denormalised Number checks
- with m.Else():
- m.next = "normalise_a"
- self.denormalise(m, a)
- self.denormalise(m, b)
-
- # ******
- # normalise_a
-
- with m.State("normalise_a"):
- self.op_normalise(m, a, "normalise_b")
-
- # ******
- # normalise_b
-
- with m.State("normalise_b"):
- self.op_normalise(m, b, "multiply_0")
-
- #multiply_0
- with m.State("multiply_0"):
- m.next = "multiply_1"
- m.d.sync += [
- z.s.eq(a.s ^ b.s),
- z.e.eq(a.e + b.e + 1),
- product.eq(a.m * b.m * 4)
- ]
-
- #multiply_1
- with m.State("multiply_1"):
- mw = z.m_width
- m.next = "normalise_1"
- m.d.sync += [
- z.m.eq(product[mw+2:]),
- of.guard.eq(product[mw+1]),
- of.round_bit.eq(product[mw]),
- of.sticky.eq(product[0:mw] != 0)
- ]
-
- # ******
- # First stage of normalisation.
- with m.State("normalise_1"):
- self.normalise_1(m, z, of, "normalise_2")
-
- # ******
- # Second stage of normalisation.
-
- with m.State("normalise_2"):
- self.normalise_2(m, z, of, "round")
-
- # ******
- # rounding stage
-
- with m.State("round"):
- self.roundz(m, z, of.roundz)
- m.next = "corrections"
-
- # ******
- # correction stage
-
- with m.State("corrections"):
- self.corrections(m, z, "pack")
-
- # ******
- # pack stage
- with m.State("pack"):
- self.pack(m, z, "put_z")
-
- # ******
- # put_z stage
-
- with m.State("put_z"):
- self.put_z(m, z, self.out_z, "get_a")
-
- return m
-
-
-if __name__ == "__main__":
- alu = FPMUL(width=32)
- main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports())
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Cat, Elaboratable
-from nmigen.cli import main, verilog
-
-from fpbase import FPNumBase
-from fpbase import FPState
-from fpcommon.denorm import FPSCData
-
-
-class FPAddStage0Data:
-
- def __init__(self, width, id_wid):
- self.z = FPNumBase(width, False)
- self.out_do_z = Signal(reset_less=True)
- self.oz = Signal(width, reset_less=True)
- self.tot = Signal(self.z.m_width + 4, reset_less=True)
- self.mid = Signal(id_wid, reset_less=True)
-
- def eq(self, i):
- return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
- self.tot.eq(i.tot), self.mid.eq(i.mid)]
-
-
-class FPAddStage0Mod(Elaboratable):
-
- def __init__(self, width, id_wid):
- self.width = width
- self.id_wid = id_wid
- self.i = self.ispec()
- self.o = self.ospec()
-
- def ispec(self):
- return FPSCData(self.width, self.id_wid)
-
- def ospec(self):
- return FPAddStage0Data(self.width, self.id_wid)
-
- def process(self, i):
- return self.o
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- m.submodules.add0 = self
- m.d.comb += self.i.eq(i)
-
- def elaborate(self, platform):
- m = Module()
- m.submodules.add0_in_a = self.i.a
- m.submodules.add0_in_b = self.i.b
- m.submodules.add0_out_z = self.o.z
-
- # store intermediate tests (and zero-extended mantissas)
- seq = Signal(reset_less=True)
- mge = Signal(reset_less=True)
- am0 = Signal(len(self.i.a.m)+1, reset_less=True)
- bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
- m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
- mge.eq(self.i.a.m >= self.i.b.m),
- am0.eq(Cat(self.i.a.m, 0)),
- bm0.eq(Cat(self.i.b.m, 0))
- ]
- # same-sign (both negative or both positive) add mantissas
- with m.If(~self.i.out_do_z):
- m.d.comb += self.o.z.e.eq(self.i.a.e)
- with m.If(seq):
- m.d.comb += [
- self.o.tot.eq(am0 + bm0),
- self.o.z.s.eq(self.i.a.s)
- ]
- # a mantissa greater than b, use a
- with m.Elif(mge):
- m.d.comb += [
- self.o.tot.eq(am0 - bm0),
- self.o.z.s.eq(self.i.a.s)
- ]
- # b mantissa greater than a, use b
- with m.Else():
- m.d.comb += [
- self.o.tot.eq(bm0 - am0),
- self.o.z.s.eq(self.i.b.s)
- ]
-
- m.d.comb += self.o.oz.eq(self.i.oz)
- m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
- m.d.comb += self.o.mid.eq(self.i.mid)
- return m
-
-
-class FPAddStage0(FPState):
- """ First stage of add. covers same-sign (add) and subtract
- special-casing when mantissas are greater or equal, to
- give greatest accuracy.
- """
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "add_0")
- self.mod = FPAddStage0Mod(width)
- self.o = self.mod.ospec()
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- self.mod.setup(m, i)
-
- # NOTE: these could be done as combinatorial (merge add0+add1)
- m.d.sync += self.o.eq(self.mod.o)
-
- def action(self, m):
- m.next = "add_1"
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Elaboratable
-from nmigen.cli import main, verilog
-from math import log
-
-from fpbase import FPState
-from fpcommon.postcalc import FPAddStage1Data
-from fpadd.add0 import FPAddStage0Data
-
-
-class FPAddStage1Mod(FPState, Elaboratable):
- """ Second stage of add: preparation for normalisation.
- detects when tot sum is too big (tot[27] is kinda a carry bit)
- """
-
- def __init__(self, width, id_wid):
- self.width = width
- self.id_wid = id_wid
- self.i = self.ispec()
- self.o = self.ospec()
-
- def ispec(self):
- return FPAddStage0Data(self.width, self.id_wid)
-
- def ospec(self):
- return FPAddStage1Data(self.width, self.id_wid)
-
- def process(self, i):
- return self.o
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- m.submodules.add1 = self
- m.submodules.add1_out_overflow = self.o.of
-
- m.d.comb += self.i.eq(i)
-
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.o.z.eq(self.i.z)
- # tot[-1] (MSB) gets set when the sum overflows. shift result down
- with m.If(~self.i.out_do_z):
- with m.If(self.i.tot[-1]):
- m.d.comb += [
- self.o.z.m.eq(self.i.tot[4:]),
- self.o.of.m0.eq(self.i.tot[4]),
- self.o.of.guard.eq(self.i.tot[3]),
- self.o.of.round_bit.eq(self.i.tot[2]),
- self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
- self.o.z.e.eq(self.i.z.e + 1)
- ]
- # tot[-1] (MSB) zero case
- with m.Else():
- m.d.comb += [
- self.o.z.m.eq(self.i.tot[3:]),
- self.o.of.m0.eq(self.i.tot[3]),
- self.o.of.guard.eq(self.i.tot[2]),
- self.o.of.round_bit.eq(self.i.tot[1]),
- self.o.of.sticky.eq(self.i.tot[0])
- ]
-
- m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
- m.d.comb += self.o.oz.eq(self.i.oz)
- m.d.comb += self.o.mid.eq(self.i.mid)
-
- return m
-
-
-class FPAddStage1(FPState):
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "add_1")
- self.mod = FPAddStage1Mod(width)
- self.out_z = FPNumBase(width, False)
- self.out_of = Overflow()
- self.norm_stb = Signal()
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- self.mod.setup(m, i)
-
- m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
-
- m.d.sync += self.out_of.eq(self.mod.out_of)
- m.d.sync += self.out_z.eq(self.mod.out_z)
- m.d.sync += self.norm_stb.eq(1)
-
- def action(self, m):
- m.next = "normalise_1"
-
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module
-from nmigen.cli import main, verilog
-
-from singlepipe import (StageChain, SimpleHandshake,
- PassThroughStage)
-
-from fpbase import FPState
-from fpcommon.denorm import FPSCData
-from fpcommon.postcalc import FPAddStage1Data
-from fpadd.align import FPAddAlignSingleMod
-from fpadd.add0 import FPAddStage0Mod
-from fpadd.add1 import FPAddStage1Mod
-
-
-class FPAddAlignSingleAdd(FPState, SimpleHandshake):
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "align")
- self.width = width
- self.id_wid = id_wid
- SimpleHandshake.__init__(self, self) # pipeline is its own stage
- self.a1o = self.ospec()
-
- def ispec(self):
- return FPSCData(self.width, self.id_wid)
-
- def ospec(self):
- return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
-
- # chain AddAlignSingle, AddStage0 and AddStage1
- mod = FPAddAlignSingleMod(self.width, self.id_wid)
- a0mod = FPAddStage0Mod(self.width, self.id_wid)
- a1mod = FPAddStage1Mod(self.width, self.id_wid)
-
- chain = StageChain([mod, a0mod, a1mod])
- chain.setup(m, i)
-
- self.o = a1mod.o
-
- def process(self, i):
- return self.o
-
- def action(self, m):
- m.d.sync += self.a1o.eq(self.process(None))
- m.next = "normalise_1"
-
-
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal
-from nmigen.cli import main, verilog
-
-from fpbase import FPNumOut, FPNumIn, FPNumBase
-from fpbase import MultiShiftRMerge
-from fpbase import FPState
-from fpcommon.denorm import FPSCData
-
-
-class FPNumIn2Ops:
-
- def __init__(self, width, id_wid):
- self.a = FPNumIn(None, width)
- self.b = FPNumIn(None, width)
- self.z = FPNumOut(width, False)
- self.out_do_z = Signal(reset_less=True)
- self.oz = Signal(width, reset_less=True)
- self.mid = Signal(id_wid, reset_less=True)
-
- def eq(self, i):
- return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
- self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
-
-
-
-class FPAddAlignMultiMod(FPState):
-
- def __init__(self, width):
- self.in_a = FPNumBase(width)
- self.in_b = FPNumBase(width)
- self.out_a = FPNumIn(None, width)
- self.out_b = FPNumIn(None, width)
- self.exp_eq = Signal(reset_less=True)
-
- def elaborate(self, platform):
- # This one however (single-cycle) will do the shift
- # in one go.
-
- m = Module()
-
- m.submodules.align_in_a = self.in_a
- m.submodules.align_in_b = self.in_b
- m.submodules.align_out_a = self.out_a
- m.submodules.align_out_b = self.out_b
-
- # NOTE: this does *not* do single-cycle multi-shifting,
- # it *STAYS* in the align state until exponents match
-
- # exponent of a greater than b: shift b down
- m.d.comb += self.exp_eq.eq(0)
- m.d.comb += self.out_a.eq(self.in_a)
- m.d.comb += self.out_b.eq(self.in_b)
- agtb = Signal(reset_less=True)
- altb = Signal(reset_less=True)
- m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
- m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
- with m.If(agtb):
- m.d.comb += self.out_b.shift_down(self.in_b)
- # exponent of b greater than a: shift a down
- with m.Elif(altb):
- m.d.comb += self.out_a.shift_down(self.in_a)
- # exponents equal: move to next stage.
- with m.Else():
- m.d.comb += self.exp_eq.eq(1)
- return m
-
-
-class FPAddAlignMulti(FPState):
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "align")
- self.mod = FPAddAlignMultiMod(width)
- self.out_a = FPNumIn(None, width)
- self.out_b = FPNumIn(None, width)
- self.exp_eq = Signal(reset_less=True)
-
- def setup(self, m, in_a, in_b):
- """ links module to inputs and outputs
- """
- m.submodules.align = self.mod
- m.d.comb += self.mod.in_a.eq(in_a)
- m.d.comb += self.mod.in_b.eq(in_b)
- m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
- m.d.sync += self.out_a.eq(self.mod.out_a)
- m.d.sync += self.out_b.eq(self.mod.out_b)
-
- def action(self, m):
- with m.If(self.exp_eq):
- m.next = "add_0"
-
-
-class FPAddAlignSingleMod:
-
- def __init__(self, width, id_wid):
- self.width = width
- self.id_wid = id_wid
- self.i = self.ispec()
- self.o = self.ospec()
-
- def ispec(self):
- return FPSCData(self.width, self.id_wid)
-
- def ospec(self):
- return FPNumIn2Ops(self.width, self.id_wid)
-
- def process(self, i):
- return self.o
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- m.submodules.align = self
- m.d.comb += self.i.eq(i)
-
- def elaborate(self, platform):
- """ Aligns A against B or B against A, depending on which has the
- greater exponent. This is done in a *single* cycle using
- variable-width bit-shift
-
- the shifter used here is quite expensive in terms of gates.
- Mux A or B in (and out) into temporaries, as only one of them
- needs to be aligned against the other
- """
- m = Module()
-
- m.submodules.align_in_a = self.i.a
- m.submodules.align_in_b = self.i.b
- m.submodules.align_out_a = self.o.a
- m.submodules.align_out_b = self.o.b
-
- # temporary (muxed) input and output to be shifted
- t_inp = FPNumBase(self.width)
- t_out = FPNumIn(None, self.width)
- espec = (len(self.i.a.e), True)
- msr = MultiShiftRMerge(self.i.a.m_width, espec)
- m.submodules.align_t_in = t_inp
- m.submodules.align_t_out = t_out
- m.submodules.multishift_r = msr
-
- ediff = Signal(espec, reset_less=True)
- ediffr = Signal(espec, reset_less=True)
- tdiff = Signal(espec, reset_less=True)
- elz = Signal(reset_less=True)
- egz = Signal(reset_less=True)
-
- # connect multi-shifter to t_inp/out mantissa (and tdiff)
- m.d.comb += msr.inp.eq(t_inp.m)
- m.d.comb += msr.diff.eq(tdiff)
- m.d.comb += t_out.m.eq(msr.m)
- m.d.comb += t_out.e.eq(t_inp.e + tdiff)
- m.d.comb += t_out.s.eq(t_inp.s)
-
- m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
- m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
- m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
- m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
-
- # default: A-exp == B-exp, A and B untouched (fall through)
- m.d.comb += self.o.a.eq(self.i.a)
- m.d.comb += self.o.b.eq(self.i.b)
- # only one shifter (muxed)
- #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
- # exponent of a greater than b: shift b down
- with m.If(~self.i.out_do_z):
- with m.If(egz):
- m.d.comb += [t_inp.eq(self.i.b),
- tdiff.eq(ediff),
- self.o.b.eq(t_out),
- self.o.b.s.eq(self.i.b.s), # whoops forgot sign
- ]
- # exponent of b greater than a: shift a down
- with m.Elif(elz):
- m.d.comb += [t_inp.eq(self.i.a),
- tdiff.eq(ediffr),
- self.o.a.eq(t_out),
- self.o.a.s.eq(self.i.a.s), # whoops forgot sign
- ]
-
- m.d.comb += self.o.mid.eq(self.i.mid)
- m.d.comb += self.o.z.eq(self.i.z)
- m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
- m.d.comb += self.o.oz.eq(self.i.oz)
-
- return m
-
-
-class FPAddAlignSingle(FPState):
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "align")
- self.mod = FPAddAlignSingleMod(width, id_wid)
- self.out_a = FPNumIn(None, width)
- self.out_b = FPNumIn(None, width)
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- self.mod.setup(m, i)
-
- # NOTE: could be done as comb
- m.d.sync += self.out_a.eq(self.mod.out_a)
- m.d.sync += self.out_b.eq(self.mod.out_b)
-
- def action(self, m):
- m.next = "add_0"
-
-
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module
-from nmigen.cli import main, verilog
-
-from singlepipe import (ControlBase, SimpleHandshake, PassThroughStage)
-from multipipe import CombMuxOutPipe
-from multipipe import PriorityCombMuxInPipe
-
-from fpcommon.getop import FPADDBaseData
-from fpcommon.denorm import FPSCData
-from fpcommon.pack import FPPackData
-from fpcommon.normtopack import FPNormToPack
-from fpadd.specialcases import FPAddSpecialCasesDeNorm
-from fpadd.addstages import FPAddAlignSingleAdd
-
-from concurrentunit import ReservationStations, num_bits
-
-
-class FPADDBasePipe(ControlBase):
- def __init__(self, width, id_wid):
- ControlBase.__init__(self)
- self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
- self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
- self.pipe3 = FPNormToPack(width, id_wid)
-
- self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
- m.submodules.scnorm = self.pipe1
- m.submodules.addalign = self.pipe2
- m.submodules.normpack = self.pipe3
- m.d.comb += self._eqs
- return m
-
-
-class FPADDMuxInOut(ReservationStations):
- """ Reservation-Station version of FPADD pipeline.
-
- * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
- * 3-stage adder pipeline
- * fan-out on outputs (an array of FPPackData: z,mid)
-
- Fan-in and Fan-out are combinatorial.
- """
- def __init__(self, width, num_rows):
- self.width = width
- self.id_wid = num_bits(width)
- self.alu = FPADDBasePipe(width, self.id_wid)
- ReservationStations.__init__(self, num_rows)
-
- def i_specfn(self):
- return FPADDBaseData(self.width, self.id_wid)
-
- def o_specfn(self):
- return FPPackData(self.width, self.id_wid)
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Cat, Const
-from nmigen.cli import main, verilog
-from math import log
-
-from fpbase import FPNumDecode
-from singlepipe import SimpleHandshake, StageChain
-
-from fpbase import FPState, FPID
-from fpcommon.getop import FPADDBaseData
-from fpcommon.denorm import (FPSCData, FPAddDeNormMod)
-
-
-class FPAddSpecialCasesMod:
- """ special cases: NaNs, infs, zeros, denormalised
- NOTE: some of these are unique to add. see "Special Operations"
- https://steve.hollasch.net/cgindex/coding/ieeefloat.html
- """
-
- def __init__(self, width, id_wid):
- self.width = width
- self.id_wid = id_wid
- self.i = self.ispec()
- self.o = self.ospec()
-
- def ispec(self):
- return FPADDBaseData(self.width, self.id_wid)
-
- def ospec(self):
- return FPSCData(self.width, self.id_wid)
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- m.submodules.specialcases = self
- m.d.comb += self.i.eq(i)
-
- def process(self, i):
- return self.o
-
- def elaborate(self, platform):
- m = Module()
-
- m.submodules.sc_out_z = self.o.z
-
- # decode: XXX really should move to separate stage
- a1 = FPNumDecode(None, self.width)
- b1 = FPNumDecode(None, self.width)
- m.submodules.sc_decode_a = a1
- m.submodules.sc_decode_b = b1
- m.d.comb += [a1.v.eq(self.i.a),
- b1.v.eq(self.i.b),
- self.o.a.eq(a1),
- self.o.b.eq(b1)
- ]
-
- s_nomatch = Signal(reset_less=True)
- m.d.comb += s_nomatch.eq(a1.s != b1.s)
-
- m_match = Signal(reset_less=True)
- m.d.comb += m_match.eq(a1.m == b1.m)
-
- e_match = Signal(reset_less=True)
- m.d.comb += e_match.eq(a1.e == b1.e)
-
- aeqmb = Signal(reset_less=True)
- m.d.comb += aeqmb.eq(s_nomatch & m_match & e_match)
-
- abz = Signal(reset_less=True)
- m.d.comb += abz.eq(a1.is_zero & b1.is_zero)
-
- abnan = Signal(reset_less=True)
- m.d.comb += abnan.eq(a1.is_nan | b1.is_nan)
-
- bexp128s = Signal(reset_less=True)
- m.d.comb += bexp128s.eq(b1.exp_128 & s_nomatch)
-
- # if a is NaN or b is NaN return NaN
- with m.If(abnan):
- m.d.comb += self.o.out_do_z.eq(1)
- m.d.comb += self.o.z.nan(0)
-
- # XXX WEIRDNESS for FP16 non-canonical NaN handling
- # under review
-
- ## if a is zero and b is NaN return -b
- #with m.If(a.is_zero & (a.s==0) & b.is_nan):
- # m.d.comb += self.o.out_do_z.eq(1)
- # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
-
- ## if b is zero and a is NaN return -a
- #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
- # m.d.comb += self.o.out_do_z.eq(1)
- # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
-
- ## if a is -zero and b is NaN return -b
- #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
- # m.d.comb += self.o.out_do_z.eq(1)
- # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
-
- ## if b is -zero and a is NaN return -a
- #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
- # m.d.comb += self.o.out_do_z.eq(1)
- # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
-
- # if a is inf return inf (or NaN)
- with m.Elif(a1.is_inf):
- m.d.comb += self.o.out_do_z.eq(1)
- m.d.comb += self.o.z.inf(a1.s)
- # if a is inf and signs don't match return NaN
- with m.If(bexp128s):
- m.d.comb += self.o.z.nan(0)
-
- # if b is inf return inf
- with m.Elif(b1.is_inf):
- m.d.comb += self.o.out_do_z.eq(1)
- m.d.comb += self.o.z.inf(b1.s)
-
- # if a is zero and b zero return signed-a/b
- with m.Elif(abz):
- m.d.comb += self.o.out_do_z.eq(1)
- m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
-
- # if a is zero return b
- with m.Elif(a1.is_zero):
- m.d.comb += self.o.out_do_z.eq(1)
- m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
-
- # if b is zero return a
- with m.Elif(b1.is_zero):
- m.d.comb += self.o.out_do_z.eq(1)
- m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
-
- # if a equal to -b return zero (+ve zero)
- with m.Elif(aeqmb):
- m.d.comb += self.o.out_do_z.eq(1)
- m.d.comb += self.o.z.zero(0)
-
- # Denormalised Number checks next, so pass a/b data through
- with m.Else():
- m.d.comb += self.o.out_do_z.eq(0)
-
- m.d.comb += self.o.oz.eq(self.o.z.v)
- m.d.comb += self.o.mid.eq(self.i.mid)
-
- return m
-
-
-class FPAddSpecialCases(FPState):
- """ special cases: NaNs, infs, zeros, denormalised
- NOTE: some of these are unique to add. see "Special Operations"
- https://steve.hollasch.net/cgindex/coding/ieeefloat.html
- """
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "special_cases")
- self.mod = FPAddSpecialCasesMod(width)
- self.out_z = self.mod.ospec()
- self.out_do_z = Signal(reset_less=True)
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- self.mod.setup(m, i, self.out_do_z)
- m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
- m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
-
- def action(self, m):
- self.idsync(m)
- with m.If(self.out_do_z):
- m.next = "put_z"
- with m.Else():
- m.next = "denormalise"
-
-
-class FPAddSpecialCasesDeNorm(FPState, SimpleHandshake):
- """ special cases: NaNs, infs, zeros, denormalised
- NOTE: some of these are unique to add. see "Special Operations"
- https://steve.hollasch.net/cgindex/coding/ieeefloat.html
- """
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "special_cases")
- self.width = width
- self.id_wid = id_wid
- SimpleHandshake.__init__(self, self) # pipe is its own stage
- self.out = self.ospec()
-
- def ispec(self):
- return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
-
- def ospec(self):
- return FPSCData(self.width, self.id_wid) # DeNorm ospec
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- smod = FPAddSpecialCasesMod(self.width, self.id_wid)
- dmod = FPAddDeNormMod(self.width, self.id_wid)
-
- chain = StageChain([smod, dmod])
- chain.setup(m, i)
-
- # only needed for break-out (early-out)
- # self.out_do_z = smod.o.out_do_z
-
- self.o = dmod.o
-
- def process(self, i):
- return self.o
-
- def action(self, m):
- # for break-out (early-out)
- #with m.If(self.out_do_z):
- # m.next = "put_z"
- #with m.Else():
- m.d.sync += self.out.eq(self.process(None))
- m.next = "align"
-
-
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Cat, Mux, Array, Const
-from nmigen.cli import main, verilog
-from math import log
-
-from fpbase import FPOpIn, FPOpOut
-from fpbase import Trigger
-from singlepipe import (StageChain, SimpleHandshake)
-
-from fpbase import FPState, FPID
-from fpcommon.getop import (FPGetOp, FPADDBaseData, FPGet2Op)
-from fpcommon.denorm import (FPSCData, FPAddDeNorm)
-from fpcommon.postcalc import FPAddStage1Data
-from fpcommon.postnormalise import (FPNorm1Data,
- FPNorm1Single, FPNorm1Multi)
-from fpcommon.roundz import (FPRoundData, FPRound)
-from fpcommon.corrections import FPCorrections
-from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
-from fpcommon.normtopack import FPNormToPack
-from fpcommon.putz import (FPPutZ, FPPutZIdx)
-
-from fpadd.specialcases import (FPAddSpecialCases, FPAddSpecialCasesDeNorm)
-from fpadd.align import (FPAddAlignMulti, FPAddAlignSingle)
-from fpadd.add0 import (FPAddStage0Data, FPAddStage0)
-from fpadd.add1 import (FPAddStage1Mod, FPAddStage1)
-from fpadd.addstages import FPAddAlignSingleAdd
-
-
-class FPOpData:
- def __init__(self, width, id_wid):
- self.z = FPOpOut(width)
- self.z.data_o = Signal(width)
- self.mid = Signal(id_wid, reset_less=True)
-
- def __iter__(self):
- yield self.z
- yield self.mid
-
- def eq(self, i):
- return [self.z.eq(i.z), self.mid.eq(i.mid)]
-
- def ports(self):
- return list(self)
-
-
-class FPADDBaseMod:
-
- def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
- """ IEEE754 FP Add
-
- * width: bit-width of IEEE754. supported: 16, 32, 64
- * id_wid: an identifier that is sync-connected to the input
- * single_cycle: True indicates each stage to complete in 1 clock
- * compact: True indicates a reduced number of stages
- """
- self.width = width
- self.id_wid = id_wid
- self.single_cycle = single_cycle
- self.compact = compact
-
- self.in_t = Trigger()
- self.i = self.ispec()
- self.o = self.ospec()
-
- self.states = []
-
- def ispec(self):
- return FPADDBaseData(self.width, self.id_wid)
-
- def ospec(self):
- return FPOpData(self.width, self.id_wid)
-
- def add_state(self, state):
- self.states.append(state)
- return state
-
- def elaborate(self, platform=None):
- """ creates the HDL code-fragment for FPAdd
- """
- m = Module()
- m.submodules.out_z = self.o.z
- m.submodules.in_t = self.in_t
- if self.compact:
- self.get_compact_fragment(m, platform)
- else:
- self.get_longer_fragment(m, platform)
-
- with m.FSM() as fsm:
-
- for state in self.states:
- with m.State(state.state_from):
- state.action(m)
-
- return m
-
- def get_longer_fragment(self, m, platform=None):
-
- get = self.add_state(FPGet2Op("get_ops", "special_cases",
- self.width))
- get.setup(m, self.i)
- a = get.out_op1
- b = get.out_op2
- get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
-
- sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
- sc.setup(m, a, b, self.in_mid)
-
- dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
- dn.setup(m, a, b, sc.in_mid)
-
- if self.single_cycle:
- alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
- alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
- else:
- alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
- alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
-
- add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
- add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
-
- add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
- add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
-
- if self.single_cycle:
- n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
- n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
- else:
- n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
- n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
-
- rn = self.add_state(FPRound(self.width, self.id_wid))
- rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
-
- cor = self.add_state(FPCorrections(self.width, self.id_wid))
- cor.setup(m, rn.out_z, rn.in_mid)
-
- pa = self.add_state(FPPack(self.width, self.id_wid))
- pa.setup(m, cor.out_z, rn.in_mid)
-
- ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
- pa.in_mid, self.out_mid))
-
- pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
- pa.in_mid, self.out_mid))
-
- def get_compact_fragment(self, m, platform=None):
-
- get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
- sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
- alm = FPAddAlignSingleAdd(self.width, self.id_wid)
- n1 = FPNormToPack(self.width, self.id_wid)
-
- get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
-
- chainlist = [get, sc, alm, n1]
- chain = StageChain(chainlist, specallocate=True)
- chain.setup(m, self.i)
-
- for mod in chainlist:
- sc = self.add_state(mod)
-
- ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
- n1.out_z.mid, self.o.mid))
-
- #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
- # sc.o.mid, self.o.mid))
-
-
-class FPADDBase(FPState):
-
- def __init__(self, width, id_wid=None, single_cycle=False):
- """ IEEE754 FP Add
-
- * width: bit-width of IEEE754. supported: 16, 32, 64
- * id_wid: an identifier that is sync-connected to the input
- * single_cycle: True indicates each stage to complete in 1 clock
- """
- FPState.__init__(self, "fpadd")
- self.width = width
- self.single_cycle = single_cycle
- self.mod = FPADDBaseMod(width, id_wid, single_cycle)
- self.o = self.ospec()
-
- self.in_t = Trigger()
- self.i = self.ispec()
-
- self.z_done = Signal(reset_less=True) # connects to out_z Strobe
- self.in_accept = Signal(reset_less=True)
- self.add_stb = Signal(reset_less=True)
- self.add_ack = Signal(reset=0, reset_less=True)
-
- def ispec(self):
- return self.mod.ispec()
-
- def ospec(self):
- return self.mod.ospec()
-
- def setup(self, m, i, add_stb, in_mid):
- m.d.comb += [self.i.eq(i),
- self.mod.i.eq(self.i),
- self.z_done.eq(self.mod.o.z.trigger),
- #self.add_stb.eq(add_stb),
- self.mod.in_t.stb.eq(self.in_t.stb),
- self.in_t.ack.eq(self.mod.in_t.ack),
- self.o.mid.eq(self.mod.o.mid),
- self.o.z.v.eq(self.mod.o.z.v),
- self.o.z.valid_o.eq(self.mod.o.z.valid_o),
- self.mod.o.z.ready_i.eq(self.o.z.ready_i_test),
- ]
-
- m.d.sync += self.add_stb.eq(add_stb)
- m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
- m.d.sync += self.o.z.ready_i.eq(0) # likewise
- #m.d.sync += self.in_t.stb.eq(0)
-
- m.submodules.fpadd = self.mod
-
- def action(self, m):
-
- # in_accept is set on incoming strobe HIGH and ack LOW.
- m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
-
- #with m.If(self.in_t.ack):
- # m.d.sync += self.in_t.stb.eq(0)
- with m.If(~self.z_done):
- # not done: test for accepting an incoming operand pair
- with m.If(self.in_accept):
- m.d.sync += [
- self.add_ack.eq(1), # acknowledge receipt...
- self.in_t.stb.eq(1), # initiate add
- ]
- with m.Else():
- m.d.sync += [self.add_ack.eq(0),
- self.in_t.stb.eq(0),
- self.o.z.ready_i.eq(1),
- ]
- with m.Else():
- # done: acknowledge, and write out id and value
- m.d.sync += [self.add_ack.eq(1),
- self.in_t.stb.eq(0)
- ]
- m.next = "put_z"
-
- return
-
- if self.in_mid is not None:
- m.d.sync += self.out_mid.eq(self.mod.out_mid)
-
- m.d.sync += [
- self.out_z.v.eq(self.mod.out_z.v)
- ]
- # move to output state on detecting z ack
- with m.If(self.out_z.trigger):
- m.d.sync += self.out_z.stb.eq(0)
- m.next = "put_z"
- with m.Else():
- m.d.sync += self.out_z.stb.eq(1)
-
-
-class FPADD(FPID):
- """ FPADD: stages as follows:
-
- FPGetOp (a)
- |
- FPGetOp (b)
- |
- FPAddBase---> FPAddBaseMod
- | |
- PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
-
- FPAddBase is tricky: it is both a stage and *has* stages.
- Connection to FPAddBaseMod therefore requires an in stb/ack
- and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
- needs to be the thing that raises the incoming stb.
- """
-
- def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
- """ IEEE754 FP Add
-
- * width: bit-width of IEEE754. supported: 16, 32, 64
- * id_wid: an identifier that is sync-connected to the input
- * single_cycle: True indicates each stage to complete in 1 clock
- """
- self.width = width
- self.id_wid = id_wid
- self.single_cycle = single_cycle
-
- #self.out_z = FPOp(width)
- self.ids = FPID(id_wid)
-
- rs = []
- for i in range(rs_sz):
- in_a = FPOpIn(width)
- in_b = FPOpIn(width)
- in_a.data_i = Signal(width)
- in_b.data_i = Signal(width)
- in_a.name = "in_a_%d" % i
- in_b.name = "in_b_%d" % i
- rs.append((in_a, in_b))
- self.rs = Array(rs)
-
- res = []
- for i in range(rs_sz):
- out_z = FPOpOut(width)
- out_z.data_o = Signal(width)
- out_z.name = "out_z_%d" % i
- res.append(out_z)
- self.res = Array(res)
-
- self.states = []
-
- def add_state(self, state):
- self.states.append(state)
- return state
-
- def elaborate(self, platform=None):
- """ creates the HDL code-fragment for FPAdd
- """
- m = Module()
- #m.submodules += self.rs
-
- in_a = self.rs[0][0]
- in_b = self.rs[0][1]
-
- geta = self.add_state(FPGetOp("get_a", "get_b",
- in_a, self.width))
- geta.setup(m, in_a)
- a = geta.out_op
-
- getb = self.add_state(FPGetOp("get_b", "fpadd",
- in_b, self.width))
- getb.setup(m, in_b)
- b = getb.out_op
-
- ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
- ab = self.add_state(ab)
- abd = ab.ispec() # create an input spec object for FPADDBase
- m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
- ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
- o = ab.o
-
- pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
- o.mid, "get_a"))
-
- with m.FSM() as fsm:
-
- for state in self.states:
- with m.State(state.state_from):
- state.action(m)
-
- return m
-
-
-if __name__ == "__main__":
- if True:
- alu = FPADD(width=32, id_wid=5, single_cycle=True)
- main(alu, ports=alu.rs[0][0].ports() + \
- alu.rs[0][1].ports() + \
- alu.res[0].ports() + \
- [alu.ids.in_mid, alu.ids.out_mid])
- else:
- alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
- main(alu, ports=[alu.in_a, alu.in_b] + \
- alu.in_t.ports() + \
- alu.out_z.ports() + \
- [alu.in_mid, alu.out_mid])
-
-
- # works... but don't use, just do "python fname.py convert -t v"
- #print (verilog.convert(alu, ports=[
- # ports=alu.in_a.ports() + \
- # alu.in_b.ports() + \
- # alu.out_z.ports())
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Signal, Cat, Const, Mux, Module, Elaboratable
-from math import log
-from operator import or_
-from functools import reduce
-
-from singlepipe import PrevControl, NextControl
-from pipeline import ObjectProxy
-
-
-class MultiShiftR:
-
- def __init__(self, width):
- self.width = width
- self.smax = int(log(width) / log(2))
- self.i = Signal(width, reset_less=True)
- self.s = Signal(self.smax, reset_less=True)
- self.o = Signal(width, reset_less=True)
-
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.o.eq(self.i >> self.s)
- return m
-
-
-class MultiShift:
- """ Generates variable-length single-cycle shifter from a series
- of conditional tests on each bit of the left/right shift operand.
- Each bit tested produces output shifted by that number of bits,
- in a binary fashion: bit 1 if set shifts by 1 bit, bit 2 if set
- shifts by 2 bits, each partial result cascading to the next Mux.
-
- Could be adapted to do arithmetic shift by taking copies of the
- MSB instead of zeros.
- """
-
- def __init__(self, width):
- self.width = width
- self.smax = int(log(width) / log(2))
-
- def lshift(self, op, s):
- res = op << s
- return res[:len(op)]
- res = op
- for i in range(self.smax):
- zeros = [0] * (1<<i)
- res = Mux(s & (1<<i), Cat(zeros, res[0:-(1<<i)]), res)
- return res
-
- def rshift(self, op, s):
- res = op >> s
- return res[:len(op)]
- res = op
- for i in range(self.smax):
- zeros = [0] * (1<<i)
- res = Mux(s & (1<<i), Cat(res[(1<<i):], zeros), res)
- return res
-
-
-class FPNumBase: #(Elaboratable):
- """ Floating-point Base Number Class
- """
- def __init__(self, width, m_extra=True):
- self.width = width
- m_width = {16: 11, 32: 24, 64: 53}[width] # 1 extra bit (overflow)
- e_width = {16: 7, 32: 10, 64: 13}[width] # 2 extra bits (overflow)
- e_max = 1<<(e_width-3)
- self.rmw = m_width # real mantissa width (not including extras)
- self.e_max = e_max
- if m_extra:
- # mantissa extra bits (top,guard,round)
- self.m_extra = 3
- m_width += self.m_extra
- else:
- self.m_extra = 0
- #print (m_width, e_width, e_max, self.rmw, self.m_extra)
- self.m_width = m_width
- self.e_width = e_width
- self.e_start = self.rmw - 1
- self.e_end = self.rmw + self.e_width - 3 # for decoding
-
- self.v = Signal(width, reset_less=True) # Latched copy of value
- self.m = Signal(m_width, reset_less=True) # Mantissa
- self.e = Signal((e_width, True), reset_less=True) # Exponent: IEEE754exp+2 bits, signed
- self.s = Signal(reset_less=True) # Sign bit
-
- self.mzero = Const(0, (m_width, False))
- m_msb = 1<<(self.m_width-2)
- self.msb1 = Const(m_msb, (m_width, False))
- self.m1s = Const(-1, (m_width, False))
- self.P128 = Const(e_max, (e_width, True))
- self.P127 = Const(e_max-1, (e_width, True))
- self.N127 = Const(-(e_max-1), (e_width, True))
- self.N126 = Const(-(e_max-2), (e_width, True))
-
- self.is_nan = Signal(reset_less=True)
- self.is_zero = Signal(reset_less=True)
- self.is_inf = Signal(reset_less=True)
- self.is_overflowed = Signal(reset_less=True)
- self.is_denormalised = Signal(reset_less=True)
- self.exp_128 = Signal(reset_less=True)
- self.exp_sub_n126 = Signal((e_width, True), reset_less=True)
- self.exp_lt_n126 = Signal(reset_less=True)
- self.exp_gt_n126 = Signal(reset_less=True)
- self.exp_gt127 = Signal(reset_less=True)
- self.exp_n127 = Signal(reset_less=True)
- self.exp_n126 = Signal(reset_less=True)
- self.m_zero = Signal(reset_less=True)
- self.m_msbzero = Signal(reset_less=True)
-
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.is_nan.eq(self._is_nan())
- m.d.comb += self.is_zero.eq(self._is_zero())
- m.d.comb += self.is_inf.eq(self._is_inf())
- m.d.comb += self.is_overflowed.eq(self._is_overflowed())
- m.d.comb += self.is_denormalised.eq(self._is_denormalised())
- m.d.comb += self.exp_128.eq(self.e == self.P128)
- m.d.comb += self.exp_sub_n126.eq(self.e - self.N126)
- m.d.comb += self.exp_gt_n126.eq(self.exp_sub_n126 > 0)
- m.d.comb += self.exp_lt_n126.eq(self.exp_sub_n126 < 0)
- m.d.comb += self.exp_gt127.eq(self.e > self.P127)
- m.d.comb += self.exp_n127.eq(self.e == self.N127)
- m.d.comb += self.exp_n126.eq(self.e == self.N126)
- m.d.comb += self.m_zero.eq(self.m == self.mzero)
- m.d.comb += self.m_msbzero.eq(self.m[self.e_start] == 0)
-
- return m
-
- def _is_nan(self):
- return (self.exp_128) & (~self.m_zero)
-
- def _is_inf(self):
- return (self.exp_128) & (self.m_zero)
-
- def _is_zero(self):
- return (self.exp_n127) & (self.m_zero)
-
- def _is_overflowed(self):
- return self.exp_gt127
-
- def _is_denormalised(self):
- return (self.exp_n126) & (self.m_msbzero)
-
- def __iter__(self):
- yield self.s
- yield self.e
- yield self.m
-
- def eq(self, inp):
- return [self.s.eq(inp.s), self.e.eq(inp.e), self.m.eq(inp.m)]
-
-
-class FPNumOut(FPNumBase):
- """ Floating-point Number Class
-
- Contains signals for an incoming copy of the value, decoded into
- sign / exponent / mantissa.
- Also contains encoding functions, creation and recognition of
- zero, NaN and inf (all signed)
-
- Four extra bits are included in the mantissa: the top bit
- (m[-1]) is effectively a carry-overflow. The other three are
- guard (m[2]), round (m[1]), and sticky (m[0])
- """
- def __init__(self, width, m_extra=True):
- FPNumBase.__init__(self, width, m_extra)
-
- def elaborate(self, platform):
- m = FPNumBase.elaborate(self, platform)
-
- return m
-
- def create(self, s, e, m):
- """ creates a value from sign / exponent / mantissa
-
- bias is added here, to the exponent
- """
- return [
- self.v[-1].eq(s), # sign
- self.v[self.e_start:self.e_end].eq(e + self.P127), # exp (add on bias)
- self.v[0:self.e_start].eq(m) # mantissa
- ]
-
- def nan(self, s):
- return self.create(s, self.P128, 1<<(self.e_start-1))
-
- def inf(self, s):
- return self.create(s, self.P128, 0)
-
- def zero(self, s):
- return self.create(s, self.N127, 0)
-
- def create2(self, s, e, m):
- """ creates a value from sign / exponent / mantissa
-
- bias is added here, to the exponent
- """
- e = e + self.P127 # exp (add on bias)
- return Cat(m[0:self.e_start],
- e[0:self.e_end-self.e_start],
- s)
-
- def nan2(self, s):
- return self.create2(s, self.P128, self.msb1)
-
- def inf2(self, s):
- return self.create2(s, self.P128, self.mzero)
-
- def zero2(self, s):
- return self.create2(s, self.N127, self.mzero)
-
-
-class MultiShiftRMerge(Elaboratable):
- """ shifts down (right) and merges lower bits into m[0].
- m[0] is the "sticky" bit, basically
- """
- def __init__(self, width, s_max=None):
- if s_max is None:
- s_max = int(log(width) / log(2))
- self.smax = s_max
- self.m = Signal(width, reset_less=True)
- self.inp = Signal(width, reset_less=True)
- self.diff = Signal(s_max, reset_less=True)
- self.width = width
-
- def elaborate(self, platform):
- m = Module()
-
- rs = Signal(self.width, reset_less=True)
- m_mask = Signal(self.width, reset_less=True)
- smask = Signal(self.width, reset_less=True)
- stickybit = Signal(reset_less=True)
- maxslen = Signal(self.smax, reset_less=True)
- maxsleni = Signal(self.smax, reset_less=True)
-
- sm = MultiShift(self.width-1)
- m0s = Const(0, self.width-1)
- mw = Const(self.width-1, len(self.diff))
- m.d.comb += [maxslen.eq(Mux(self.diff > mw, mw, self.diff)),
- maxsleni.eq(Mux(self.diff > mw, 0, mw-self.diff)),
- ]
-
- m.d.comb += [
- # shift mantissa by maxslen, mask by inverse
- rs.eq(sm.rshift(self.inp[1:], maxslen)),
- m_mask.eq(sm.rshift(~m0s, maxsleni)),
- smask.eq(self.inp[1:] & m_mask),
- # sticky bit combines all mask (and mantissa low bit)
- stickybit.eq(smask.bool() | self.inp[0]),
- # mantissa result contains m[0] already.
- self.m.eq(Cat(stickybit, rs))
- ]
- return m
-
-
-class FPNumShift(FPNumBase, Elaboratable):
- """ Floating-point Number Class for shifting
- """
- def __init__(self, mainm, op, inv, width, m_extra=True):
- FPNumBase.__init__(self, width, m_extra)
- self.latch_in = Signal()
- self.mainm = mainm
- self.inv = inv
- self.op = op
-
- def elaborate(self, platform):
- m = FPNumBase.elaborate(self, platform)
-
- m.d.comb += self.s.eq(op.s)
- m.d.comb += self.e.eq(op.e)
- m.d.comb += self.m.eq(op.m)
-
- with self.mainm.State("align"):
- with m.If(self.e < self.inv.e):
- m.d.sync += self.shift_down()
-
- return m
-
- def shift_down(self, inp):
- """ shifts a mantissa down by one. exponent is increased to compensate
-
- accuracy is lost as a result in the mantissa however there are 3
- guard bits (the latter of which is the "sticky" bit)
- """
- return [self.e.eq(inp.e + 1),
- self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0))
- ]
-
- def shift_down_multi(self, diff):
- """ shifts a mantissa down. exponent is increased to compensate
-
- accuracy is lost as a result in the mantissa however there are 3
- guard bits (the latter of which is the "sticky" bit)
-
- this code works by variable-shifting the mantissa by up to
- its maximum bit-length: no point doing more (it'll still be
- zero).
-
- the sticky bit is computed by shifting a batch of 1s by
- the same amount, which will introduce zeros. it's then
- inverted and used as a mask to get the LSBs of the mantissa.
- those are then |'d into the sticky bit.
- """
- sm = MultiShift(self.width)
- mw = Const(self.m_width-1, len(diff))
- maxslen = Mux(diff > mw, mw, diff)
- rs = sm.rshift(self.m[1:], maxslen)
- maxsleni = mw - maxslen
- m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert
-
- stickybits = reduce(or_, self.m[1:] & m_mask) | self.m[0]
- return [self.e.eq(self.e + diff),
- self.m.eq(Cat(stickybits, rs))
- ]
-
- def shift_up_multi(self, diff):
- """ shifts a mantissa up. exponent is decreased to compensate
- """
- sm = MultiShift(self.width)
- mw = Const(self.m_width, len(diff))
- maxslen = Mux(diff > mw, mw, diff)
-
- return [self.e.eq(self.e - diff),
- self.m.eq(sm.lshift(self.m, maxslen))
- ]
-
-
-class FPNumDecode(FPNumBase):
- """ Floating-point Number Class
-
- Contains signals for an incoming copy of the value, decoded into
- sign / exponent / mantissa.
- Also contains encoding functions, creation and recognition of
- zero, NaN and inf (all signed)
-
- Four extra bits are included in the mantissa: the top bit
- (m[-1]) is effectively a carry-overflow. The other three are
- guard (m[2]), round (m[1]), and sticky (m[0])
- """
- def __init__(self, op, width, m_extra=True):
- FPNumBase.__init__(self, width, m_extra)
- self.op = op
-
- def elaborate(self, platform):
- m = FPNumBase.elaborate(self, platform)
-
- m.d.comb += self.decode(self.v)
-
- return m
-
- def decode(self, v):
- """ decodes a latched value into sign / exponent / mantissa
-
- bias is subtracted here, from the exponent. exponent
- is extended to 10 bits so that subtract 127 is done on
- a 10-bit number
- """
- args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
- #print ("decode", self.e_end)
- return [self.m.eq(Cat(*args)), # mantissa
- self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp
- self.s.eq(v[-1]), # sign
- ]
-
-class FPNumIn(FPNumBase):
- """ Floating-point Number Class
-
- Contains signals for an incoming copy of the value, decoded into
- sign / exponent / mantissa.
- Also contains encoding functions, creation and recognition of
- zero, NaN and inf (all signed)
-
- Four extra bits are included in the mantissa: the top bit
- (m[-1]) is effectively a carry-overflow. The other three are
- guard (m[2]), round (m[1]), and sticky (m[0])
- """
- def __init__(self, op, width, m_extra=True):
- FPNumBase.__init__(self, width, m_extra)
- self.latch_in = Signal()
- self.op = op
-
- def decode2(self, m):
- """ decodes a latched value into sign / exponent / mantissa
-
- bias is subtracted here, from the exponent. exponent
- is extended to 10 bits so that subtract 127 is done on
- a 10-bit number
- """
- v = self.v
- args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
- #print ("decode", self.e_end)
- res = ObjectProxy(m, pipemode=False)
- res.m = Cat(*args) # mantissa
- res.e = v[self.e_start:self.e_end] - self.P127 # exp
- res.s = v[-1] # sign
- return res
-
- def decode(self, v):
- """ decodes a latched value into sign / exponent / mantissa
-
- bias is subtracted here, from the exponent. exponent
- is extended to 10 bits so that subtract 127 is done on
- a 10-bit number
- """
- args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
- #print ("decode", self.e_end)
- return [self.m.eq(Cat(*args)), # mantissa
- self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp
- self.s.eq(v[-1]), # sign
- ]
-
- def shift_down(self, inp):
- """ shifts a mantissa down by one. exponent is increased to compensate
-
- accuracy is lost as a result in the mantissa however there are 3
- guard bits (the latter of which is the "sticky" bit)
- """
- return [self.e.eq(inp.e + 1),
- self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0))
- ]
-
- def shift_down_multi(self, diff, inp=None):
- """ shifts a mantissa down. exponent is increased to compensate
-
- accuracy is lost as a result in the mantissa however there are 3
- guard bits (the latter of which is the "sticky" bit)
-
- this code works by variable-shifting the mantissa by up to
- its maximum bit-length: no point doing more (it'll still be
- zero).
-
- the sticky bit is computed by shifting a batch of 1s by
- the same amount, which will introduce zeros. it's then
- inverted and used as a mask to get the LSBs of the mantissa.
- those are then |'d into the sticky bit.
- """
- if inp is None:
- inp = self
- sm = MultiShift(self.width)
- mw = Const(self.m_width-1, len(diff))
- maxslen = Mux(diff > mw, mw, diff)
- rs = sm.rshift(inp.m[1:], maxslen)
- maxsleni = mw - maxslen
- m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert
-
- #stickybit = reduce(or_, inp.m[1:] & m_mask) | inp.m[0]
- stickybit = (inp.m[1:] & m_mask).bool() | inp.m[0]
- return [self.e.eq(inp.e + diff),
- self.m.eq(Cat(stickybit, rs))
- ]
-
- def shift_up_multi(self, diff):
- """ shifts a mantissa up. exponent is decreased to compensate
- """
- sm = MultiShift(self.width)
- mw = Const(self.m_width, len(diff))
- maxslen = Mux(diff > mw, mw, diff)
-
- return [self.e.eq(self.e - diff),
- self.m.eq(sm.lshift(self.m, maxslen))
- ]
-
-class Trigger(Elaboratable):
- def __init__(self):
-
- self.stb = Signal(reset=0)
- self.ack = Signal()
- self.trigger = Signal(reset_less=True)
-
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.trigger.eq(self.stb & self.ack)
- return m
-
- def eq(self, inp):
- return [self.stb.eq(inp.stb),
- self.ack.eq(inp.ack)
- ]
-
- def ports(self):
- return [self.stb, self.ack]
-
-
-class FPOpIn(PrevControl):
- def __init__(self, width):
- PrevControl.__init__(self)
- self.width = width
-
- @property
- def v(self):
- return self.data_i
-
- def chain_inv(self, in_op, extra=None):
- stb = in_op.stb
- if extra is not None:
- stb = stb & extra
- return [self.v.eq(in_op.v), # receive value
- self.stb.eq(stb), # receive STB
- in_op.ack.eq(~self.ack), # send ACK
- ]
-
- def chain_from(self, in_op, extra=None):
- stb = in_op.stb
- if extra is not None:
- stb = stb & extra
- return [self.v.eq(in_op.v), # receive value
- self.stb.eq(stb), # receive STB
- in_op.ack.eq(self.ack), # send ACK
- ]
-
-
-class FPOpOut(NextControl):
- def __init__(self, width):
- NextControl.__init__(self)
- self.width = width
-
- @property
- def v(self):
- return self.data_o
-
- def chain_inv(self, in_op, extra=None):
- stb = in_op.stb
- if extra is not None:
- stb = stb & extra
- return [self.v.eq(in_op.v), # receive value
- self.stb.eq(stb), # receive STB
- in_op.ack.eq(~self.ack), # send ACK
- ]
-
- def chain_from(self, in_op, extra=None):
- stb = in_op.stb
- if extra is not None:
- stb = stb & extra
- return [self.v.eq(in_op.v), # receive value
- self.stb.eq(stb), # receive STB
- in_op.ack.eq(self.ack), # send ACK
- ]
-
-
-class Overflow: #(Elaboratable):
- def __init__(self):
- self.guard = Signal(reset_less=True) # tot[2]
- self.round_bit = Signal(reset_less=True) # tot[1]
- self.sticky = Signal(reset_less=True) # tot[0]
- self.m0 = Signal(reset_less=True) # mantissa zero bit
-
- self.roundz = Signal(reset_less=True)
-
- def __iter__(self):
- yield self.guard
- yield self.round_bit
- yield self.sticky
- yield self.m0
-
- def eq(self, inp):
- return [self.guard.eq(inp.guard),
- self.round_bit.eq(inp.round_bit),
- self.sticky.eq(inp.sticky),
- self.m0.eq(inp.m0)]
-
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.roundz.eq(self.guard & \
- (self.round_bit | self.sticky | self.m0))
- return m
-
-
-class FPBase:
- """ IEEE754 Floating Point Base Class
-
- contains common functions for FP manipulation, such as
- extracting and packing operands, normalisation, denormalisation,
- rounding etc.
- """
-
- def get_op(self, m, op, v, next_state):
- """ this function moves to the next state and copies the operand
- when both stb and ack are 1.
- acknowledgement is sent by setting ack to ZERO.
- """
- res = v.decode2(m)
- ack = Signal()
- with m.If((op.ready_o) & (op.valid_i_test)):
- m.next = next_state
- # op is latched in from FPNumIn class on same ack/stb
- m.d.comb += ack.eq(0)
- with m.Else():
- m.d.comb += ack.eq(1)
- return [res, ack]
-
- def denormalise(self, m, a):
- """ denormalises a number. this is probably the wrong name for
- this function. for normalised numbers (exponent != minimum)
- one *extra* bit (the implicit 1) is added *back in*.
- for denormalised numbers, the mantissa is left alone
- and the exponent increased by 1.
-
- both cases *effectively multiply the number stored by 2*,
- which has to be taken into account when extracting the result.
- """
- with m.If(a.exp_n127):
- m.d.sync += a.e.eq(a.N126) # limit a exponent
- with m.Else():
- m.d.sync += a.m[-1].eq(1) # set top mantissa bit
-
- def op_normalise(self, m, op, next_state):
- """ operand normalisation
- NOTE: just like "align", this one keeps going round every clock
- until the result's exponent is within acceptable "range"
- """
- with m.If((op.m[-1] == 0)): # check last bit of mantissa
- m.d.sync +=[
- op.e.eq(op.e - 1), # DECREASE exponent
- op.m.eq(op.m << 1), # shift mantissa UP
- ]
- with m.Else():
- m.next = next_state
-
- def normalise_1(self, m, z, of, next_state):
- """ first stage normalisation
-
- NOTE: just like "align", this one keeps going round every clock
- until the result's exponent is within acceptable "range"
- NOTE: the weirdness of reassigning guard and round is due to
- the extra mantissa bits coming from tot[0..2]
- """
- with m.If((z.m[-1] == 0) & (z.e > z.N126)):
- m.d.sync += [
- z.e.eq(z.e - 1), # DECREASE exponent
- z.m.eq(z.m << 1), # shift mantissa UP
- z.m[0].eq(of.guard), # steal guard bit (was tot[2])
- of.guard.eq(of.round_bit), # steal round_bit (was tot[1])
- of.round_bit.eq(0), # reset round bit
- of.m0.eq(of.guard),
- ]
- with m.Else():
- m.next = next_state
-
- def normalise_2(self, m, z, of, next_state):
- """ second stage normalisation
-
- NOTE: just like "align", this one keeps going round every clock
- until the result's exponent is within acceptable "range"
- NOTE: the weirdness of reassigning guard and round is due to
- the extra mantissa bits coming from tot[0..2]
- """
- with m.If(z.e < z.N126):
- m.d.sync +=[
- z.e.eq(z.e + 1), # INCREASE exponent
- z.m.eq(z.m >> 1), # shift mantissa DOWN
- of.guard.eq(z.m[0]),
- of.m0.eq(z.m[1]),
- of.round_bit.eq(of.guard),
- of.sticky.eq(of.sticky | of.round_bit)
- ]
- with m.Else():
- m.next = next_state
-
- def roundz(self, m, z, roundz):
- """ performs rounding on the output. TODO: different kinds of rounding
- """
- with m.If(roundz):
- m.d.sync += z.m.eq(z.m + 1) # mantissa rounds up
- with m.If(z.m == z.m1s): # all 1s
- m.d.sync += z.e.eq(z.e + 1) # exponent rounds up
-
- def corrections(self, m, z, next_state):
- """ denormalisation and sign-bug corrections
- """
- m.next = next_state
- # denormalised, correct exponent to zero
- with m.If(z.is_denormalised):
- m.d.sync += z.e.eq(z.N127)
-
- def pack(self, m, z, next_state):
- """ packs the result into the output (detects overflow->Inf)
- """
- m.next = next_state
- # if overflow occurs, return inf
- with m.If(z.is_overflowed):
- m.d.sync += z.inf(z.s)
- with m.Else():
- m.d.sync += z.create(z.s, z.e, z.m)
-
- def put_z(self, m, z, out_z, next_state):
- """ put_z: stores the result in the output. raises stb and waits
- for ack to be set to 1 before moving to the next state.
- resets stb back to zero when that occurs, as acknowledgement.
- """
- m.d.sync += [
- out_z.v.eq(z.v)
- ]
- with m.If(out_z.valid_o & out_z.ready_i_test):
- m.d.sync += out_z.valid_o.eq(0)
- m.next = next_state
- with m.Else():
- m.d.sync += out_z.valid_o.eq(1)
-
-
-class FPState(FPBase):
- def __init__(self, state_from):
- self.state_from = state_from
-
- def set_inputs(self, inputs):
- self.inputs = inputs
- for k,v in inputs.items():
- setattr(self, k, v)
-
- def set_outputs(self, outputs):
- self.outputs = outputs
- for k,v in outputs.items():
- setattr(self, k, v)
-
-
-class FPID:
- def __init__(self, id_wid):
- self.id_wid = id_wid
- if self.id_wid:
- self.in_mid = Signal(id_wid, reset_less=True)
- self.out_mid = Signal(id_wid, reset_less=True)
- else:
- self.in_mid = None
- self.out_mid = None
-
- def idsync(self, m):
- if self.id_wid is not None:
- m.d.sync += self.out_mid.eq(self.in_mid)
-
-
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Elaboratable
-from nmigen.cli import main, verilog
-from fpbase import FPState
-from fpcommon.roundz import FPRoundData
-
-
-class FPCorrectionsMod(Elaboratable):
-
- def __init__(self, width, id_wid):
- self.width = width
- self.id_wid = id_wid
- self.i = self.ispec()
- self.out_z = self.ospec()
-
- def ispec(self):
- return FPRoundData(self.width, self.id_wid)
-
- def ospec(self):
- return FPRoundData(self.width, self.id_wid)
-
- def process(self, i):
- return self.out_z
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- m.submodules.corrections = self
- m.d.comb += self.i.eq(i)
-
- def elaborate(self, platform):
- m = Module()
- m.submodules.corr_in_z = self.i.z
- m.submodules.corr_out_z = self.out_z.z
- m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
- with m.If(~self.i.out_do_z):
- with m.If(self.i.z.is_denormalised):
- m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
- return m
-
-
-class FPCorrections(FPState):
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "corrections")
- self.mod = FPCorrectionsMod(width)
- self.out_z = self.ospec()
-
- def ispec(self):
- return self.mod.ispec()
-
- def ospec(self):
- return self.mod.ospec()
-
- def setup(self, m, in_z):
- """ links module to inputs and outputs
- """
- self.mod.setup(m, in_z)
-
- m.d.sync += self.out_z.eq(self.mod.out_z)
- m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
-
- def action(self, m):
- m.next = "pack"
-
-
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal
-from nmigen.cli import main, verilog
-from math import log
-
-from fpbase import FPNumIn, FPNumOut, FPNumBase
-from fpbase import FPState
-
-
-class FPSCData:
-
- def __init__(self, width, id_wid):
- self.a = FPNumBase(width, True)
- self.b = FPNumBase(width, True)
- self.z = FPNumOut(width, False)
- self.oz = Signal(width, reset_less=True)
- self.out_do_z = Signal(reset_less=True)
- self.mid = Signal(id_wid, reset_less=True)
-
- def __iter__(self):
- yield from self.a
- yield from self.b
- yield from self.z
- yield self.oz
- yield self.out_do_z
- yield self.mid
-
- def eq(self, i):
- return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
- self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
-
-
-class FPAddDeNormMod(FPState):
-
- def __init__(self, width, id_wid):
- self.width = width
- self.id_wid = id_wid
- self.i = self.ispec()
- self.o = self.ospec()
-
- def ispec(self):
- return FPSCData(self.width, self.id_wid)
-
- def ospec(self):
- return FPSCData(self.width, self.id_wid)
-
- def process(self, i):
- return self.o
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- m.submodules.denormalise = self
- m.d.comb += self.i.eq(i)
-
- def elaborate(self, platform):
- m = Module()
- m.submodules.denorm_in_a = self.i.a
- m.submodules.denorm_in_b = self.i.b
- m.submodules.denorm_out_a = self.o.a
- m.submodules.denorm_out_b = self.o.b
-
- with m.If(~self.i.out_do_z):
- # XXX hmmm, don't like repeating identical code
- m.d.comb += self.o.a.eq(self.i.a)
- with m.If(self.i.a.exp_n127):
- m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
- with m.Else():
- m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
-
- m.d.comb += self.o.b.eq(self.i.b)
- with m.If(self.i.b.exp_n127):
- m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
- with m.Else():
- m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
-
- m.d.comb += self.o.mid.eq(self.i.mid)
- m.d.comb += self.o.z.eq(self.i.z)
- m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
- m.d.comb += self.o.oz.eq(self.i.oz)
-
- return m
-
-
-class FPAddDeNorm(FPState):
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "denormalise")
- self.mod = FPAddDeNormMod(width)
- self.out_a = FPNumBase(width)
- self.out_b = FPNumBase(width)
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- self.mod.setup(m, i)
-
- m.d.sync += self.out_a.eq(self.mod.out_a)
- m.d.sync += self.out_b.eq(self.mod.out_b)
-
- def action(self, m):
- # Denormalised Number checks
- m.next = "align"
-
-
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Cat, Mux, Array, Const, Elaboratable
-from nmigen.lib.coding import PriorityEncoder
-from nmigen.cli import main, verilog
-from math import log
-
-from fpbase import FPNumIn, FPNumOut, FPOpIn, Overflow, FPBase, FPNumBase
-from fpbase import MultiShiftRMerge, Trigger
-from singlepipe import (ControlBase, StageChain, SimpleHandshake,
- PassThroughStage, PrevControl)
-from multipipe import CombMuxOutPipe
-from multipipe import PriorityCombMuxInPipe
-
-from fpbase import FPState
-import nmoperator
-
-
-class FPGetOpMod(Elaboratable):
- def __init__(self, width):
- self.in_op = FPOpIn(width)
- self.in_op.data_i = Signal(width)
- self.out_op = Signal(width)
- self.out_decode = Signal(reset_less=True)
-
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.out_decode.eq((self.in_op.ready_o) & \
- (self.in_op.valid_i_test))
- m.submodules.get_op_in = self.in_op
- #m.submodules.get_op_out = self.out_op
- with m.If(self.out_decode):
- m.d.comb += [
- self.out_op.eq(self.in_op.v),
- ]
- return m
-
-
-class FPGetOp(FPState):
- """ gets operand
- """
-
- def __init__(self, in_state, out_state, in_op, width):
- FPState.__init__(self, in_state)
- self.out_state = out_state
- self.mod = FPGetOpMod(width)
- self.in_op = in_op
- self.out_op = Signal(width)
- self.out_decode = Signal(reset_less=True)
-
- def setup(self, m, in_op):
- """ links module to inputs and outputs
- """
- setattr(m.submodules, self.state_from, self.mod)
- m.d.comb += nmoperator.eq(self.mod.in_op, in_op)
- m.d.comb += self.out_decode.eq(self.mod.out_decode)
-
- def action(self, m):
- with m.If(self.out_decode):
- m.next = self.out_state
- m.d.sync += [
- self.in_op.ready_o.eq(0),
- self.out_op.eq(self.mod.out_op)
- ]
- with m.Else():
- m.d.sync += self.in_op.ready_o.eq(1)
-
-
-class FPNumBase2Ops:
-
- def __init__(self, width, id_wid, m_extra=True):
- self.a = FPNumBase(width, m_extra)
- self.b = FPNumBase(width, m_extra)
- self.mid = Signal(id_wid, reset_less=True)
-
- def eq(self, i):
- return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
-
- def ports(self):
- return [self.a, self.b, self.mid]
-
-
-class FPADDBaseData:
-
- def __init__(self, width, id_wid):
- self.width = width
- self.id_wid = id_wid
- self.a = Signal(width)
- self.b = Signal(width)
- self.mid = Signal(id_wid, reset_less=True)
-
- def eq(self, i):
- return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
-
- def ports(self):
- return [self.a, self.b, self.mid]
-
-
-class FPGet2OpMod(PrevControl):
- def __init__(self, width, id_wid):
- PrevControl.__init__(self)
- self.width = width
- self.id_wid = id_wid
- self.data_i = self.ispec()
- self.i = self.data_i
- self.o = self.ospec()
-
- def ispec(self):
- return FPADDBaseData(self.width, self.id_wid)
-
- def ospec(self):
- return FPADDBaseData(self.width, self.id_wid)
-
- def process(self, i):
- return self.o
-
- def elaborate(self, platform):
- m = PrevControl.elaborate(self, platform)
- with m.If(self.trigger):
- m.d.comb += [
- self.o.eq(self.data_i),
- ]
- return m
-
-
-class FPGet2Op(FPState):
- """ gets operands
- """
-
- def __init__(self, in_state, out_state, width, id_wid):
- FPState.__init__(self, in_state)
- self.out_state = out_state
- self.mod = FPGet2OpMod(width, id_wid)
- self.o = self.ospec()
- self.in_stb = Signal(reset_less=True)
- self.out_ack = Signal(reset_less=True)
- self.out_decode = Signal(reset_less=True)
-
- def ispec(self):
- return self.mod.ispec()
-
- def ospec(self):
- return self.mod.ospec()
-
- def trigger_setup(self, m, in_stb, in_ack):
- """ links stb/ack
- """
- m.d.comb += self.mod.valid_i.eq(in_stb)
- m.d.comb += in_ack.eq(self.mod.ready_o)
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- m.submodules.get_ops = self.mod
- m.d.comb += self.mod.i.eq(i)
- m.d.comb += self.out_ack.eq(self.mod.ready_o)
- m.d.comb += self.out_decode.eq(self.mod.trigger)
-
- def process(self, i):
- return self.o
-
- def action(self, m):
- with m.If(self.out_decode):
- m.next = self.out_state
- m.d.sync += [
- self.mod.ready_o.eq(0),
- self.o.eq(self.mod.o),
- ]
- with m.Else():
- m.d.sync += self.mod.ready_o.eq(1)
-
-
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-#from nmigen.cli import main, verilog
-
-from singlepipe import StageChain, SimpleHandshake
-
-from fpbase import FPState, FPID
-from fpcommon.postcalc import FPAddStage1Data
-from fpcommon.postnormalise import FPNorm1ModSingle
-from fpcommon.roundz import FPRoundMod
-from fpcommon.corrections import FPCorrectionsMod
-from fpcommon.pack import FPPackData, FPPackMod
-
-
-class FPNormToPack(FPState, SimpleHandshake):
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "normalise_1")
- self.id_wid = id_wid
- self.width = width
- SimpleHandshake.__init__(self, self) # pipeline is its own stage
-
- def ispec(self):
- return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
-
- def ospec(self):
- return FPPackData(self.width, self.id_wid) # FPPackMod ospec
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
-
- # Normalisation, Rounding Corrections, Pack - in a chain
- nmod = FPNorm1ModSingle(self.width, self.id_wid)
- rmod = FPRoundMod(self.width, self.id_wid)
- cmod = FPCorrectionsMod(self.width, self.id_wid)
- pmod = FPPackMod(self.width, self.id_wid)
- stages = [nmod, rmod, cmod, pmod]
- chain = StageChain(stages)
- chain.setup(m, i)
- self.out_z = pmod.ospec()
-
- self.o = pmod.o
-
- def process(self, i):
- return self.o
-
- def action(self, m):
- m.d.sync += self.out_z.eq(self.process(None))
- m.next = "pack_put_z"
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Elaboratable
-from nmigen.cli import main, verilog
-
-from fpbase import FPNumOut
-from fpbase import FPState
-from fpcommon.roundz import FPRoundData
-from singlepipe import Object
-
-
-class FPPackData(Object):
-
- def __init__(self, width, id_wid):
- Object.__init__(self)
- self.z = Signal(width, reset_less=True)
- self.mid = Signal(id_wid, reset_less=True)
-
-
-class FPPackMod(Elaboratable):
-
- def __init__(self, width, id_wid):
- self.width = width
- self.id_wid = id_wid
- self.i = self.ispec()
- self.o = self.ospec()
-
- def ispec(self):
- return FPRoundData(self.width, self.id_wid)
-
- def ospec(self):
- return FPPackData(self.width, self.id_wid)
-
- def process(self, i):
- return self.o
-
- def setup(self, m, in_z):
- """ links module to inputs and outputs
- """
- m.submodules.pack = self
- m.d.comb += self.i.eq(in_z)
-
- def elaborate(self, platform):
- m = Module()
- z = FPNumOut(self.width, False)
- m.submodules.pack_in_z = self.i.z
- m.submodules.pack_out_z = z
- m.d.comb += self.o.mid.eq(self.i.mid)
- with m.If(~self.i.out_do_z):
- with m.If(self.i.z.is_overflowed):
- m.d.comb += z.inf(self.i.z.s)
- with m.Else():
- m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
- with m.Else():
- m.d.comb += z.v.eq(self.i.oz)
- m.d.comb += self.o.z.eq(z.v)
- return m
-
-
-class FPPack(FPState):
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "pack")
- self.mod = FPPackMod(width)
- self.out_z = self.ospec()
-
- def ispec(self):
- return self.mod.ispec()
-
- def ospec(self):
- return self.mod.ospec()
-
- def setup(self, m, in_z):
- """ links module to inputs and outputs
- """
- self.mod.setup(m, in_z)
-
- m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
- m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
-
- def action(self, m):
- m.next = "pack_put_z"
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Signal
-from fpbase import Overflow, FPNumBase
-
-class FPAddStage1Data:
-
- def __init__(self, width, id_wid):
- self.z = FPNumBase(width, False)
- self.out_do_z = Signal(reset_less=True)
- self.oz = Signal(width, reset_less=True)
- self.of = Overflow()
- self.mid = Signal(id_wid, reset_less=True)
-
- def __iter__(self):
- yield from self.z
- yield self.out_do_z
- yield self.oz
- yield from self.of
- yield self.mid
-
- def eq(self, i):
- return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
- self.of.eq(i.of), self.mid.eq(i.mid)]
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Cat, Mux, Elaboratable
-from nmigen.lib.coding import PriorityEncoder
-from nmigen.cli import main, verilog
-from math import log
-
-from fpbase import Overflow, FPNumBase
-from fpbase import MultiShiftRMerge
-from fpbase import FPState
-from .postcalc import FPAddStage1Data
-
-
-class FPNorm1Data:
-
- def __init__(self, width, id_wid):
- self.roundz = Signal(reset_less=True)
- self.z = FPNumBase(width, False)
- self.out_do_z = Signal(reset_less=True)
- self.oz = Signal(width, reset_less=True)
- self.mid = Signal(id_wid, reset_less=True)
-
- def eq(self, i):
- return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
- self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
-
-
-class FPNorm1ModSingle(Elaboratable):
-
- def __init__(self, width, id_wid):
- self.width = width
- self.id_wid = id_wid
- self.i = self.ispec()
- self.o = self.ospec()
-
- def ispec(self):
- return FPAddStage1Data(self.width, self.id_wid)
-
- def ospec(self):
- return FPNorm1Data(self.width, self.id_wid)
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- m.submodules.normalise_1 = self
- m.d.comb += self.i.eq(i)
-
- def process(self, i):
- return self.o
-
- def elaborate(self, platform):
- m = Module()
-
- mwid = self.o.z.m_width+2
- pe = PriorityEncoder(mwid)
- m.submodules.norm_pe = pe
-
- of = Overflow()
- m.d.comb += self.o.roundz.eq(of.roundz)
-
- m.submodules.norm1_out_z = self.o.z
- m.submodules.norm1_out_overflow = of
- m.submodules.norm1_in_z = self.i.z
- m.submodules.norm1_in_overflow = self.i.of
-
- i = self.ispec()
- m.submodules.norm1_insel_z = i.z
- m.submodules.norm1_insel_overflow = i.of
-
- espec = (len(i.z.e), True)
- ediff_n126 = Signal(espec, reset_less=True)
- msr = MultiShiftRMerge(mwid, espec)
- m.submodules.multishift_r = msr
-
- m.d.comb += i.eq(self.i)
- # initialise out from in (overridden below)
- m.d.comb += self.o.z.eq(i.z)
- m.d.comb += of.eq(i.of)
- # normalisation increase/decrease conditions
- decrease = Signal(reset_less=True)
- increase = Signal(reset_less=True)
- m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
- m.d.comb += increase.eq(i.z.exp_lt_n126)
- # decrease exponent
- with m.If(~self.i.out_do_z):
- with m.If(decrease):
- # *sigh* not entirely obvious: count leading zeros (clz)
- # with a PriorityEncoder: to find from the MSB
- # we reverse the order of the bits.
- temp_m = Signal(mwid, reset_less=True)
- temp_s = Signal(mwid+1, reset_less=True)
- clz = Signal((len(i.z.e), True), reset_less=True)
- # make sure that the amount to decrease by does NOT
- # go below the minimum non-INF/NaN exponent
- limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
- i.z.exp_sub_n126)
- m.d.comb += [
- # cat round and guard bits back into the mantissa
- temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
- pe.i.eq(temp_m[::-1]), # inverted
- clz.eq(limclz), # count zeros from MSB down
- temp_s.eq(temp_m << clz), # shift mantissa UP
- self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
- self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
- of.m0.eq(temp_s[2]), # copy of mantissa[0]
- # overflow in bits 0..1: got shifted too (leave sticky)
- of.guard.eq(temp_s[1]), # guard
- of.round_bit.eq(temp_s[0]), # round
- ]
- # increase exponent
- with m.Elif(increase):
- temp_m = Signal(mwid+1, reset_less=True)
- m.d.comb += [
- temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
- i.z.m)),
- ediff_n126.eq(i.z.N126 - i.z.e),
- # connect multi-shifter to inp/out mantissa (and ediff)
- msr.inp.eq(temp_m),
- msr.diff.eq(ediff_n126),
- self.o.z.m.eq(msr.m[3:]),
- of.m0.eq(temp_s[3]), # copy of mantissa[0]
- # overflow in bits 0..1: got shifted too (leave sticky)
- of.guard.eq(temp_s[2]), # guard
- of.round_bit.eq(temp_s[1]), # round
- of.sticky.eq(temp_s[0]), # sticky
- self.o.z.e.eq(i.z.e + ediff_n126),
- ]
-
- m.d.comb += self.o.mid.eq(self.i.mid)
- m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
- m.d.comb += self.o.oz.eq(self.i.oz)
-
- return m
-
-
-class FPNorm1ModMulti:
-
- def __init__(self, width, single_cycle=True):
- self.width = width
- self.in_select = Signal(reset_less=True)
- self.in_z = FPNumBase(width, False)
- self.in_of = Overflow()
- self.temp_z = FPNumBase(width, False)
- self.temp_of = Overflow()
- self.out_z = FPNumBase(width, False)
- self.out_of = Overflow()
-
- def elaborate(self, platform):
- m = Module()
-
- m.submodules.norm1_out_z = self.out_z
- m.submodules.norm1_out_overflow = self.out_of
- m.submodules.norm1_temp_z = self.temp_z
- m.submodules.norm1_temp_of = self.temp_of
- m.submodules.norm1_in_z = self.in_z
- m.submodules.norm1_in_overflow = self.in_of
-
- in_z = FPNumBase(self.width, False)
- in_of = Overflow()
- m.submodules.norm1_insel_z = in_z
- m.submodules.norm1_insel_overflow = in_of
-
- # select which of temp or in z/of to use
- with m.If(self.in_select):
- m.d.comb += in_z.eq(self.in_z)
- m.d.comb += in_of.eq(self.in_of)
- with m.Else():
- m.d.comb += in_z.eq(self.temp_z)
- m.d.comb += in_of.eq(self.temp_of)
- # initialise out from in (overridden below)
- m.d.comb += self.out_z.eq(in_z)
- m.d.comb += self.out_of.eq(in_of)
- # normalisation increase/decrease conditions
- decrease = Signal(reset_less=True)
- increase = Signal(reset_less=True)
- m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
- m.d.comb += increase.eq(in_z.exp_lt_n126)
- m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
- # decrease exponent
- with m.If(decrease):
- m.d.comb += [
- self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
- self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
- self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
- self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
- self.out_of.round_bit.eq(0), # reset round bit
- self.out_of.m0.eq(in_of.guard),
- ]
- # increase exponent
- with m.Elif(increase):
- m.d.comb += [
- self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
- self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
- self.out_of.guard.eq(in_z.m[0]),
- self.out_of.m0.eq(in_z.m[1]),
- self.out_of.round_bit.eq(in_of.guard),
- self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
- ]
-
- return m
-
-
-class FPNorm1Single(FPState):
-
- def __init__(self, width, id_wid, single_cycle=True):
- FPState.__init__(self, "normalise_1")
- self.mod = FPNorm1ModSingle(width)
- self.o = self.ospec()
- self.out_z = FPNumBase(width, False)
- self.out_roundz = Signal(reset_less=True)
-
- def ispec(self):
- return self.mod.ispec()
-
- def ospec(self):
- return self.mod.ospec()
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- self.mod.setup(m, i)
-
- def action(self, m):
- m.next = "round"
-
-
-class FPNorm1Multi(FPState):
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "normalise_1")
- self.mod = FPNorm1ModMulti(width)
- self.stb = Signal(reset_less=True)
- self.ack = Signal(reset=0, reset_less=True)
- self.out_norm = Signal(reset_less=True)
- self.in_accept = Signal(reset_less=True)
- self.temp_z = FPNumBase(width)
- self.temp_of = Overflow()
- self.out_z = FPNumBase(width)
- self.out_roundz = Signal(reset_less=True)
-
- def setup(self, m, in_z, in_of, norm_stb):
- """ links module to inputs and outputs
- """
- self.mod.setup(m, in_z, in_of, norm_stb,
- self.in_accept, self.temp_z, self.temp_of,
- self.out_z, self.out_norm)
-
- m.d.comb += self.stb.eq(norm_stb)
- m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
-
- def action(self, m):
- m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
- m.d.sync += self.temp_of.eq(self.mod.out_of)
- m.d.sync += self.temp_z.eq(self.out_z)
- with m.If(self.out_norm):
- with m.If(self.in_accept):
- m.d.sync += [
- self.ack.eq(1),
- ]
- with m.Else():
- m.d.sync += self.ack.eq(0)
- with m.Else():
- # normalisation not required (or done).
- m.next = "round"
- m.d.sync += self.ack.eq(1)
- m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
-
-
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Cat,
-from nmigen.lib.coding import PriorityEncoder
-from nmigen.cli import main, verilog
-from math import log
-
-from fpbase import Overflow, FPNumBase
-from fpbase import MultiShiftRMerge
-
-from fpbase import FPState
-
-
-class FPNormaliseModSingle:
-
- def __init__(self, width):
- self.width = width
- self.in_z = self.ispec()
- self.out_z = self.ospec()
-
- def ispec(self):
- return FPNumBase(self.width, False)
-
- def ospec(self):
- return FPNumBase(self.width, False)
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- m.submodules.normalise = self
- m.d.comb += self.i.eq(i)
-
- def elaborate(self, platform):
- m = Module()
-
- mwid = self.out_z.m_width+2
- pe = PriorityEncoder(mwid)
- m.submodules.norm_pe = pe
-
- m.submodules.norm1_out_z = self.out_z
- m.submodules.norm1_in_z = self.in_z
-
- in_z = FPNumBase(self.width, False)
- in_of = Overflow()
- m.submodules.norm1_insel_z = in_z
- m.submodules.norm1_insel_overflow = in_of
-
- espec = (len(in_z.e), True)
- ediff_n126 = Signal(espec, reset_less=True)
- msr = MultiShiftRMerge(mwid, espec)
- m.submodules.multishift_r = msr
-
- m.d.comb += in_z.eq(self.in_z)
- m.d.comb += in_of.eq(self.in_of)
- # initialise out from in (overridden below)
- m.d.comb += self.out_z.eq(in_z)
- m.d.comb += self.out_of.eq(in_of)
- # normalisation decrease condition
- decrease = Signal(reset_less=True)
- m.d.comb += decrease.eq(in_z.m_msbzero)
- # decrease exponent
- with m.If(decrease):
- # *sigh* not entirely obvious: count leading zeros (clz)
- # with a PriorityEncoder: to find from the MSB
- # we reverse the order of the bits.
- temp_m = Signal(mwid, reset_less=True)
- temp_s = Signal(mwid+1, reset_less=True)
- clz = Signal((len(in_z.e), True), reset_less=True)
- m.d.comb += [
- # cat round and guard bits back into the mantissa
- temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
- pe.i.eq(temp_m[::-1]), # inverted
- clz.eq(pe.o), # count zeros from MSB down
- temp_s.eq(temp_m << clz), # shift mantissa UP
- self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
- self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
- ]
-
- return m
-
-
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Signal
-from nmigen.cli import main, verilog
-from fpbase import FPState
-
-
-class FPPutZ(FPState):
-
- def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
- FPState.__init__(self, state)
- if to_state is None:
- to_state = "get_ops"
- self.to_state = to_state
- self.in_z = in_z
- self.out_z = out_z
- self.in_mid = in_mid
- self.out_mid = out_mid
-
- def action(self, m):
- if self.in_mid is not None:
- m.d.sync += self.out_mid.eq(self.in_mid)
- m.d.sync += [
- self.out_z.z.v.eq(self.in_z)
- ]
- with m.If(self.out_z.z.valid_o & self.out_z.z.ready_i_test):
- m.d.sync += self.out_z.z.valid_o.eq(0)
- m.next = self.to_state
- with m.Else():
- m.d.sync += self.out_z.z.valid_o.eq(1)
-
-
-class FPPutZIdx(FPState):
-
- def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
- FPState.__init__(self, state)
- if to_state is None:
- to_state = "get_ops"
- self.to_state = to_state
- self.in_z = in_z
- self.out_zs = out_zs
- self.in_mid = in_mid
-
- def action(self, m):
- outz_stb = Signal(reset_less=True)
- outz_ack = Signal(reset_less=True)
- m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].valid_o),
- outz_ack.eq(self.out_zs[self.in_mid].ready_i_test),
- ]
- m.d.sync += [
- self.out_zs[self.in_mid].v.eq(self.in_z.v)
- ]
- with m.If(outz_stb & outz_ack):
- m.d.sync += self.out_zs[self.in_mid].valid_o.eq(0)
- m.next = self.to_state
- with m.Else():
- m.d.sync += self.out_zs[self.in_mid].valid_o.eq(1)
-
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Elaboratable
-from nmigen.cli import main, verilog
-
-from fpbase import FPNumBase
-from fpbase import FPState
-from fpcommon.postnormalise import FPNorm1Data
-
-
-class FPRoundData:
-
- def __init__(self, width, id_wid):
- self.z = FPNumBase(width, False)
- self.out_do_z = Signal(reset_less=True)
- self.oz = Signal(width, reset_less=True)
- self.mid = Signal(id_wid, reset_less=True)
-
- def eq(self, i):
- return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
- self.mid.eq(i.mid)]
-
-
-class FPRoundMod(Elaboratable):
-
- def __init__(self, width, id_wid):
- self.width = width
- self.id_wid = id_wid
- self.i = self.ispec()
- self.out_z = self.ospec()
-
- def ispec(self):
- return FPNorm1Data(self.width, self.id_wid)
-
- def ospec(self):
- return FPRoundData(self.width, self.id_wid)
-
- def process(self, i):
- return self.out_z
-
- def setup(self, m, i):
- m.submodules.roundz = self
- m.d.comb += self.i.eq(i)
-
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
- with m.If(~self.i.out_do_z):
- with m.If(self.i.roundz):
- m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
- with m.If(self.i.z.m == self.i.z.m1s): # all 1s
- m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
-
- return m
-
-
-class FPRound(FPState):
-
- def __init__(self, width, id_wid):
- FPState.__init__(self, "round")
- self.mod = FPRoundMod(width)
- self.out_z = self.ospec()
-
- def ispec(self):
- return self.mod.ispec()
-
- def ospec(self):
- return self.mod.ospec()
-
- def setup(self, m, i):
- """ links module to inputs and outputs
- """
- self.mod.setup(m, i)
-
- self.idsync(m)
- m.d.sync += self.out_z.eq(self.mod.out_z)
- m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
-
- def action(self, m):
- m.next = "corrections"
+++ /dev/null
-from sfpy import Float32
-
-
-# XXX DO NOT USE, fails on num=65536. wark-wark...
-def sqrtsimple(num):
- res = 0
- bit = 1
-
- while (bit < num):
- bit <<= 2
-
- while (bit != 0):
- if (num >= res + bit):
- num -= res + bit
- res = (res >> 1) + bit
- else:
- res >>= 1
- bit >>= 2
-
- return res
-
-
-def sqrt(num):
- D = num # D is input (from num)
- Q = 0 # quotient
- R = 0 # remainder
- for i in range(64, -1, -1): # negative ranges are weird...
-
- R = (R<<2)|((D>>(i+i))&3)
-
- if R >= 0:
- R -= ((Q<<2)|1) # -Q01
- else:
- R += ((Q<<2)|3) # +Q11
-
- Q <<= 1
- if R >= 0:
- Q |= 1 # new Q
-
- if R < 0:
- R = R + ((Q<<1)|1)
-
- return Q, R
-
-
-# grabbed these from unit_test_single (convenience, this is just experimenting)
-
-def get_mantissa(x):
- return 0x7fffff & x
-
-def get_exponent(x):
- return ((x & 0x7f800000) >> 23) - 127
-
-def set_exponent(x, e):
- return (x & ~0x7f800000) | ((e+127) << 23)
-
-def get_sign(x):
- return ((x & 0x80000000) >> 31)
-
-# convert FP32 to s/e/m
-def create_fp32(s, e, m):
- """ receive sign, exponent, mantissa, return FP32 """
- return set_exponent((s << 31) | get_mantissa(m))
-
-# convert s/e/m to FP32
-def decode_fp32(x):
- """ receive FP32, return sign, exponent, mantissa """
- return get_sign(x), get_exponent(x), get_mantissa(x)
-
-
-# main function, takes mantissa and exponent as separate arguments
-# returns a tuple, sqrt'd mantissa, sqrt'd exponent
-
-def main(mantissa, exponent):
- if exponent & 1 != 0:
- # shift mantissa up, subtract 1 from exp to compensate
- mantissa <<= 1
- exponent -= 1
- m, r = sqrt(mantissa)
- return m, r, exponent >> 1
-
-
-#normalization function
-def normalise(s, m, e, lowbits):
- if (lowbits >= 2):
- m += 1
- if get_mantissa(m) == ((1<<24)-1):
- e += 1
- return s, m, e
-
-
-def fsqrt_test(x):
-
- xbits = x.bits
- print ("x", x, type(x))
- sq_test = x.sqrt()
- print ("sqrt", sq_test)
-
- print (xbits, type(xbits))
- s, e, m = decode_fp32(xbits)
- print("x decode", s, e, m, hex(m))
-
- m |= 1<<23 # set top bit (the missing "1" from mantissa)
- m <<= 27
-
- sm, sr, se = main(m, e)
- lowbits = sm & 0x3
- sm >>= 2
- sm = get_mantissa(sm)
- #sm += 2
-
- s, sm, se = normalise(s, sm, se, lowbits)
-
- print("our sqrt", s, se, sm, hex(sm), bin(sm), "lowbits", lowbits,
- "rem", hex(sr))
- if lowbits >= 2:
- print ("probably needs rounding (+1 on mantissa)")
-
- sq_xbits = sq_test.bits
- s, e, m = decode_fp32(sq_xbits)
- print ("sf32 sqrt", s, e, m, hex(m), bin(m))
- print ()
-
-if __name__ == '__main__':
-
- # quick test up to 1000 of two sqrt functions
- for Q in range(1, int(1e4)):
- print(Q, sqrt(Q), sqrtsimple(Q), int(Q**0.5))
- assert int(Q**0.5) == sqrtsimple(Q), "Q sqrtsimpl fail %d" % Q
- assert int(Q**0.5) == sqrt(Q)[0], "Q sqrt fail %d" % Q
-
- # quick mantissa/exponent demo
- for e in range(26):
- for m in range(26):
- ms, mr, es = main(m, e)
- print("m:%d e:%d sqrt: m:%d-%d e:%d" % (m, e, ms, mr, es))
-
- x = Float32(1234.123456789)
- fsqrt_test(x)
- x = Float32(32.1)
- fsqrt_test(x)
- x = Float32(16.0)
- fsqrt_test(x)
- x = Float32(8.0)
- fsqrt_test(x)
- x = Float32(8.5)
- fsqrt_test(x)
- x = Float32(3.14159265358979323)
- fsqrt_test(x)
- x = Float32(12.99392923123123)
- fsqrt_test(x)
- x = Float32(0.123456)
- fsqrt_test(x)
-
-
-
-
-"""
-
-Notes:
-https://pdfs.semanticscholar.org/5060/4e9aff0e37089c4ab9a376c3f35761ffe28b.pdf
-
-//This is the main code of integer sqrt function found here:http://verilogcodes.blogspot.com/2017/11/a-verilog-function-for-finding-square-root.html
-//
-
-module testbench;
-
-reg [15:0] sqr;
-
-//Verilog function to find square root of a 32 bit number.
-//The output is 16 bit.
-function [15:0] sqrt;
- input [31:0] num; //declare input
- //intermediate signals.
- reg [31:0] a;
- reg [15:0] q;
- reg [17:0] left,right,r;
- integer i;
-begin
- //initialize all the variables.
- a = num;
- q = 0;
- i = 0;
- left = 0; //input to adder/sub
- right = 0; //input to adder/sub
- r = 0; //remainder
- //run the calculations for 16 iterations.
- for(i=0;i<16;i=i+1) begin
- right = {q,r[17],1'b1};
- left = {r[15:0],a[31:30]};
- a = {a[29:0],2'b00}; //left shift by 2 bits.
- if (r[17] == 1) //add if r is negative
- r = left + right;
- else //subtract if r is positive
- r = left - right;
- q = {q[14:0],!r[17]};
- end
- sqrt = q; //final assignment of output.
-end
-endfunction //end of Function
-
-
-c version (from paper linked from URL)
-
-unsigned squart(D, r) /*Non-Restoring sqrt*/
- unsigned D; /*D:32-bit unsigned integer to be square rooted */
- int *r;
-{
- unsigned Q = 0; /*Q:16-bit unsigned integer (root)*/
- int R = 0; /*R:17-bit integer (remainder)*/
- int i;
- for (i = 15;i>=0;i--) /*for each root bit*/
- {
- if (R>=0)
- { /*new remainder:*/
- R = R<<2)|((D>>(i+i))&3);
- R = R-((Q<<2)|1); /*-Q01*/
- }
- else
- { /*new remainder:*/
- R = R<<2)|((D>>(i+i))&3);
- R = R+((Q<<2)|3); /*+Q11*/
- }
- if (R>=0) Q = Q<<1)|1; /*new Q:*/
- else Q = Q<<1)|0; /*new Q:*/
- }
-
- /*remainder adjusting*/
- if (R<0) R = R+((Q<<1)|1);
- *r = R; /*return remainder*/
- return(Q); /*return root*/
-}
-
-From wikipedia page:
-
-short isqrt(short num) {
- short res = 0;
- short bit = 1 << 14; // The second-to-top bit is set: 1 << 30 for 32 bits
-
- // "bit" starts at the highest power of four <= the argument.
- while (bit > num)
- bit >>= 2;
-
- while (bit != 0) {
- if (num >= res + bit) {
- num -= res + bit;
- res = (res >> 1) + bit;
- }
- else
- res >>= 1;
- bit >>= 2;
- }
- return res;
-}
-
-"""
+++ /dev/null
-from nmigen import Signal, Cat, Const, Mux, Module, Array
-from nmigen.cli import main, verilog
-
-from nmigen_add_experiment import FPADD
-from rstation_row import ReservationStationRow
-
-from math import log
-
-class FunctionUnit:
-
- def __init__(self, width, num_units):
- """ Function Unit
-
- * width: bit-width of IEEE754. supported: 16, 32, 64
- * num_units: number of Reservation Stations
- """
- self.width = width
-
- fus = []
- bsz = int(log(width) / log(2))
- for i in range(num_units):
- mid = Const(i, bsz)
- rs = ReservationStationRow(width, mid)
- rs.name = "RS%d" % i
- fus.append(rs)
- self.fus = Array(fus)
-
- def elaborate(self, platform=None):
- """ creates the HDL code-fragment for ReservationStationRow
- """
- m = Module()
-
- return m
-
-
-if __name__ == "__main__":
- rs = ReservationStationRow(width=32, id_wid=Const(1,4)
- main(alu, ports=[rs.in_a, rs.in_b, rs.out_z]
-
- # works... but don't use, just do "python fname.py convert -t v"
- #print (verilog.convert(alu, ports=[
- # ports=alu.in_a.ports() + \
- # alu.in_b.ports() + \
- # alu.out_z.ports())
+++ /dev/null
-from nmigen import Module, Signal, Cat, Array, Const
-from nmigen.lib.coding import PriorityEncoder
-from math import log
-
-from fpbase import Trigger
-
-
-class FPGetSyncOpsMod:
- def __init__(self, width, num_ops=2):
- self.width = width
- self.num_ops = num_ops
- inops = []
- outops = []
- for i in range(num_ops):
- inops.append(Signal(width, reset_less=True))
- outops.append(Signal(width, reset_less=True))
- self.in_op = inops
- self.out_op = outops
- self.stb = Signal(num_ops)
- self.ack = Signal()
- self.ready = Signal(reset_less=True)
- self.out_decode = Signal(reset_less=True)
-
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
- m.d.comb += self.out_decode.eq(self.ack & self.ready)
- with m.If(self.out_decode):
- for i in range(self.num_ops):
- m.d.comb += [
- self.out_op[i].eq(self.in_op[i]),
- ]
- return m
-
- def ports(self):
- return self.in_op + self.out_op + [self.stb, self.ack]
-
-
-class FPOps(Trigger):
- def __init__(self, width, num_ops):
- Trigger.__init__(self)
- self.width = width
- self.num_ops = num_ops
-
- res = []
- for i in range(num_ops):
- res.append(Signal(width))
- self.v = Array(res)
-
- def ports(self):
- res = []
- for i in range(self.num_ops):
- res.append(self.v[i])
- res.append(self.ack)
- res.append(self.stb)
- return res
-
-
-class InputGroup:
- def __init__(self, width, num_ops=2, num_rows=4):
- self.width = width
- self.num_ops = num_ops
- self.num_rows = num_rows
- self.mmax = int(log(self.num_rows) / log(2))
- self.rs = []
- self.mid = Signal(self.mmax, reset_less=True) # multiplex id
- for i in range(num_rows):
- self.rs.append(FPGetSyncOpsMod(width, num_ops))
- self.rs = Array(self.rs)
-
- self.out_op = FPOps(width, num_ops)
-
- def elaborate(self, platform):
- m = Module()
-
- pe = PriorityEncoder(self.num_rows)
- m.submodules.selector = pe
- m.submodules.out_op = self.out_op
- m.submodules += self.rs
-
- # connect priority encoder
- in_ready = []
- for i in range(self.num_rows):
- in_ready.append(self.rs[i].ready)
- m.d.comb += pe.i.eq(Cat(*in_ready))
-
- active = Signal(reset_less=True)
- out_en = Signal(reset_less=True)
- m.d.comb += active.eq(~pe.n) # encoder active
- m.d.comb += out_en.eq(active & self.out_op.trigger)
-
- # encoder active: ack relevant input, record MID, pass output
- with m.If(out_en):
- rs = self.rs[pe.o]
- m.d.sync += self.mid.eq(pe.o)
- m.d.sync += rs.ack.eq(0)
- m.d.sync += self.out_op.stb.eq(0)
- for j in range(self.num_ops):
- m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
- with m.Else():
- m.d.sync += self.out_op.stb.eq(1)
- # acks all default to zero
- for i in range(self.num_rows):
- m.d.sync += self.rs[i].ack.eq(1)
-
- return m
-
- def ports(self):
- res = []
- for i in range(self.num_rows):
- inop = self.rs[i]
- res += inop.in_op + [inop.stb]
- return self.out_op.ports() + res + [self.mid]
-
-
+++ /dev/null
-""" IO Control API
-
- Associated development bugs:
- * http://bugs.libre-riscv.org/show_bug.cgi?id=64
- * http://bugs.libre-riscv.org/show_bug.cgi?id=57
-
- Stage API:
- ---------
-
- stage requires compliance with a strict API that may be
- implemented in several means, including as a static class.
-
- Stages do not HOLD data, and they definitely do not contain
- signalling (ready/valid). They do however specify the FORMAT
- of the incoming and outgoing data, and they provide a means to
- PROCESS that data (from incoming format to outgoing format).
-
- Stage Blocks really must be combinatorial blocks. It would be ok
- to have input come in from sync'd sources (clock-driven) however by
- doing so they would no longer be deterministic, and chaining such
- blocks with such side-effects together could result in unexpected,
- unpredictable, unreproduceable behaviour.
- So generally to be avoided, then unless you know what you are doing.
-
- the methods of a stage instance must be as follows:
-
- * ispec() - Input data format specification. Takes a bit of explaining.
- The requirements are: something that eventually derives from
- nmigen Value must be returned *OR* an iterator or iterable
- or sequence (list, tuple etc.) or generator must *yield*
- thing(s) that (eventually) derive from the nmigen Value class.
-
- Complex to state, very simple in practice:
- see test_buf_pipe.py for over 25 worked examples.
-
- * ospec() - Output data format specification.
- format requirements identical to ispec.
-
- * process(m, i) - Optional function for processing ispec-formatted data.
- returns a combinatorial block of a result that
- may be assigned to the output, by way of the "nmoperator.eq"
- function. Note that what is returned here can be
- extremely flexible. Even a dictionary can be returned
- as long as it has fields that match precisely with the
- Record into which its values is intended to be assigned.
- Again: see example unit tests for details.
-
- * setup(m, i) - Optional function for setting up submodules.
- may be used for more complex stages, to link
- the input (i) to submodules. must take responsibility
- for adding those submodules to the module (m).
- the submodules must be combinatorial blocks and
- must have their inputs and output linked combinatorially.
-
- Both StageCls (for use with non-static classes) and Stage (for use
- by static classes) are abstract classes from which, for convenience
- and as a courtesy to other developers, anything conforming to the
- Stage API may *choose* to derive. See Liskov Substitution Principle:
- https://en.wikipedia.org/wiki/Liskov_substitution_principle
-
- StageChain:
- ----------
-
- A useful combinatorial wrapper around stages that chains them together
- and then presents a Stage-API-conformant interface. By presenting
- the same API as the stages it wraps, it can clearly be used recursively.
-
- ControlBase:
- -----------
-
- The base class for pipelines. Contains previous and next ready/valid/data.
- Also has an extremely useful "connect" function that can be used to
- connect a chain of pipelines and present the exact same prev/next
- ready/valid/data API.
-
- Note: pipelines basically do not become pipelines as such until
- handed to a derivative of ControlBase. ControlBase itself is *not*
- strictly considered a pipeline class. Wishbone and AXI4 (master or
- slave) could be derived from ControlBase, for example.
-"""
-
-from nmigen import Signal, Cat, Const, Module, Value, Elaboratable
-from nmigen.cli import verilog, rtlil
-from nmigen.hdl.rec import Record
-
-from collections.abc import Sequence, Iterable
-from collections import OrderedDict
-
-import nmoperator
-
-
-class Object:
- def __init__(self):
- self.fields = OrderedDict()
-
- def __setattr__(self, k, v):
- print ("kv", k, v)
- if (k.startswith('_') or k in ["fields", "name", "src_loc"] or
- k in dir(Object) or "fields" not in self.__dict__):
- return object.__setattr__(self, k, v)
- self.fields[k] = v
-
- def __getattr__(self, k):
- if k in self.__dict__:
- return object.__getattr__(self, k)
- try:
- return self.fields[k]
- except KeyError as e:
- raise AttributeError(e)
-
- def __iter__(self):
- for x in self.fields.values(): # OrderedDict so order is preserved
- if isinstance(x, Iterable):
- yield from x
- else:
- yield x
-
- def eq(self, inp):
- res = []
- for (k, o) in self.fields.items():
- i = getattr(inp, k)
- print ("eq", o, i)
- rres = o.eq(i)
- if isinstance(rres, Sequence):
- res += rres
- else:
- res.append(rres)
- print (res)
- return res
-
- def ports(self): # being called "keys" would be much better
- return list(self)
-
-
-class RecordObject(Record):
- def __init__(self, layout=None, name=None):
- Record.__init__(self, layout=layout or [], name=None)
-
- def __setattr__(self, k, v):
- #print (dir(Record))
- if (k.startswith('_') or k in ["fields", "name", "src_loc"] or
- k in dir(Record) or "fields" not in self.__dict__):
- return object.__setattr__(self, k, v)
- self.fields[k] = v
- #print ("RecordObject setattr", k, v)
- if isinstance(v, Record):
- newlayout = {k: (k, v.layout)}
- elif isinstance(v, Value):
- newlayout = {k: (k, v.shape())}
- else:
- newlayout = {k: (k, nmoperator.shape(v))}
- self.layout.fields.update(newlayout)
-
- def __iter__(self):
- for x in self.fields.values(): # remember: fields is an OrderedDict
- if isinstance(x, Iterable):
- yield from x # a bit like flatten (nmigen.tools)
- else:
- yield x
-
- def ports(self): # would be better being called "keys"
- return list(self)
-
-
-class PrevControl(Elaboratable):
- """ contains signals that come *from* the previous stage (both in and out)
- * valid_i: previous stage indicating all incoming data is valid.
- may be a multi-bit signal, where all bits are required
- to be asserted to indicate "valid".
- * ready_o: output to next stage indicating readiness to accept data
- * data_i : an input - MUST be added by the USER of this class
- """
-
- def __init__(self, i_width=1, stage_ctl=False):
- self.stage_ctl = stage_ctl
- self.valid_i = Signal(i_width, name="p_valid_i") # prev >>in self
- self._ready_o = Signal(name="p_ready_o") # prev <<out self
- self.data_i = None # XXX MUST BE ADDED BY USER
- if stage_ctl:
- self.s_ready_o = Signal(name="p_s_o_rdy") # prev <<out self
- self.trigger = Signal(reset_less=True)
-
- @property
- def ready_o(self):
- """ public-facing API: indicates (externally) that stage is ready
- """
- if self.stage_ctl:
- return self.s_ready_o # set dynamically by stage
- return self._ready_o # return this when not under dynamic control
-
- def _connect_in(self, prev, direct=False, fn=None, do_data=True):
- """ internal helper function to connect stage to an input source.
- do not use to connect stage-to-stage!
- """
- valid_i = prev.valid_i if direct else prev.valid_i_test
- res = [self.valid_i.eq(valid_i),
- prev.ready_o.eq(self.ready_o)]
- if do_data is False:
- return res
- data_i = fn(prev.data_i) if fn is not None else prev.data_i
- return res + [nmoperator.eq(self.data_i, data_i)]
-
- @property
- def valid_i_test(self):
- vlen = len(self.valid_i)
- if vlen > 1:
- # multi-bit case: valid only when valid_i is all 1s
- all1s = Const(-1, (len(self.valid_i), False))
- valid_i = (self.valid_i == all1s)
- else:
- # single-bit valid_i case
- valid_i = self.valid_i
-
- # when stage indicates not ready, incoming data
- # must "appear" to be not ready too
- if self.stage_ctl:
- valid_i = valid_i & self.s_ready_o
-
- return valid_i
-
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.trigger.eq(self.valid_i_test & self.ready_o)
- return m
-
- def eq(self, i):
- return [nmoperator.eq(self.data_i, i.data_i),
- self.ready_o.eq(i.ready_o),
- self.valid_i.eq(i.valid_i)]
-
- def __iter__(self):
- yield self.valid_i
- yield self.ready_o
- if hasattr(self.data_i, "ports"):
- yield from self.data_i.ports()
- elif isinstance(self.data_i, Sequence):
- yield from self.data_i
- else:
- yield self.data_i
-
- def ports(self):
- return list(self)
-
-
-class NextControl(Elaboratable):
- """ contains the signals that go *to* the next stage (both in and out)
- * valid_o: output indicating to next stage that data is valid
- * ready_i: input from next stage indicating that it can accept data
- * data_o : an output - MUST be added by the USER of this class
- """
- def __init__(self, stage_ctl=False):
- self.stage_ctl = stage_ctl
- self.valid_o = Signal(name="n_valid_o") # self out>> next
- self.ready_i = Signal(name="n_ready_i") # self <<in next
- self.data_o = None # XXX MUST BE ADDED BY USER
- #if self.stage_ctl:
- self.d_valid = Signal(reset=1) # INTERNAL (data valid)
- self.trigger = Signal(reset_less=True)
-
- @property
- def ready_i_test(self):
- if self.stage_ctl:
- return self.ready_i & self.d_valid
- return self.ready_i
-
- def connect_to_next(self, nxt, do_data=True):
- """ helper function to connect to the next stage data/valid/ready.
- data/valid is passed *TO* nxt, and ready comes *IN* from nxt.
- use this when connecting stage-to-stage
- """
- res = [nxt.valid_i.eq(self.valid_o),
- self.ready_i.eq(nxt.ready_o)]
- if do_data:
- res.append(nmoperator.eq(nxt.data_i, self.data_o))
- return res
-
- def _connect_out(self, nxt, direct=False, fn=None, do_data=True):
- """ internal helper function to connect stage to an output source.
- do not use to connect stage-to-stage!
- """
- ready_i = nxt.ready_i if direct else nxt.ready_i_test
- res = [nxt.valid_o.eq(self.valid_o),
- self.ready_i.eq(ready_i)]
- if not do_data:
- return res
- data_o = fn(nxt.data_o) if fn is not None else nxt.data_o
- return res + [nmoperator.eq(data_o, self.data_o)]
-
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.trigger.eq(self.ready_i_test & self.valid_o)
- return m
-
- def __iter__(self):
- yield self.ready_i
- yield self.valid_o
- if hasattr(self.data_o, "ports"):
- yield from self.data_o.ports()
- elif isinstance(self.data_o, Sequence):
- yield from self.data_o
- else:
- yield self.data_o
-
- def ports(self):
- return list(self)
-
+++ /dev/null
-""" Combinatorial Multi-input and Multi-output multiplexer blocks
- conforming to Pipeline API
-
- Multi-input is complex because if any one input is ready, the output
- can be ready, and the decision comes from a separate module.
-
- Multi-output is simple (pretty much identical to UnbufferedPipeline),
- and the selection is just a mux. The only proviso (difference) being:
- the outputs not being selected have to have their ready_o signals
- DEASSERTED.
-"""
-
-from math import log
-from nmigen import Signal, Cat, Const, Mux, Module, Array, Elaboratable
-from nmigen.cli import verilog, rtlil
-from nmigen.lib.coding import PriorityEncoder
-from nmigen.hdl.rec import Record, Layout
-from stageapi import _spec
-
-from collections.abc import Sequence
-
-from example_buf_pipe import eq, NextControl, PrevControl, ExampleStage
-
-
-class MultiInControlBase(Elaboratable):
- """ Common functions for Pipeline API
- """
- def __init__(self, in_multi=None, p_len=1):
- """ Multi-input Control class. Conforms to same API as ControlBase...
- mostly. has additional indices to the *multiple* input stages
-
- * p: contains ready/valid to the previous stages PLURAL
- * n: contains ready/valid to the next stage
-
- User must also:
- * add data_i members to PrevControl and
- * add data_o member to NextControl
- """
- # set up input and output IO ACK (prev/next ready/valid)
- p = []
- for i in range(p_len):
- p.append(PrevControl(in_multi))
- self.p = Array(p)
- self.n = NextControl()
-
- def connect_to_next(self, nxt, p_idx=0):
- """ helper function to connect to the next stage data/valid/ready.
- """
- return self.n.connect_to_next(nxt.p[p_idx])
-
- def _connect_in(self, prev, idx=0, prev_idx=None):
- """ helper function to connect stage to an input source. do not
- use to connect stage-to-stage!
- """
- if prev_idx is None:
- return self.p[idx]._connect_in(prev.p)
- return self.p[idx]._connect_in(prev.p[prev_idx])
-
- def _connect_out(self, nxt):
- """ helper function to connect stage to an output source. do not
- use to connect stage-to-stage!
- """
- if nxt_idx is None:
- return self.n._connect_out(nxt.n)
- return self.n._connect_out(nxt.n)
-
- def set_input(self, i, idx=0):
- """ helper function to set the input data
- """
- return eq(self.p[idx].data_i, i)
-
- def elaborate(self, platform):
- m = Module()
- for i, p in enumerate(self.p):
- setattr(m.submodules, "p%d" % i, p)
- m.submodules.n = self.n
- return m
-
- def __iter__(self):
- for p in self.p:
- yield from p
- yield from self.n
-
- def ports(self):
- return list(self)
-
-
-class MultiOutControlBase(Elaboratable):
- """ Common functions for Pipeline API
- """
- def __init__(self, n_len=1, in_multi=None):
- """ Multi-output Control class. Conforms to same API as ControlBase...
- mostly. has additional indices to the multiple *output* stages
- [MultiInControlBase has multiple *input* stages]
-
- * p: contains ready/valid to the previou stage
- * n: contains ready/valid to the next stages PLURAL
-
- User must also:
- * add data_i member to PrevControl and
- * add data_o members to NextControl
- """
-
- # set up input and output IO ACK (prev/next ready/valid)
- self.p = PrevControl(in_multi)
- n = []
- for i in range(n_len):
- n.append(NextControl())
- self.n = Array(n)
-
- def connect_to_next(self, nxt, n_idx=0):
- """ helper function to connect to the next stage data/valid/ready.
- """
- return self.n[n_idx].connect_to_next(nxt.p)
-
- def _connect_in(self, prev, idx=0):
- """ helper function to connect stage to an input source. do not
- use to connect stage-to-stage!
- """
- return self.n[idx]._connect_in(prev.p)
-
- def _connect_out(self, nxt, idx=0, nxt_idx=None):
- """ helper function to connect stage to an output source. do not
- use to connect stage-to-stage!
- """
- if nxt_idx is None:
- return self.n[idx]._connect_out(nxt.n)
- return self.n[idx]._connect_out(nxt.n[nxt_idx])
-
- def elaborate(self, platform):
- m = Module()
- m.submodules.p = self.p
- for i, n in enumerate(self.n):
- setattr(m.submodules, "n%d" % i, n)
- return m
-
- def set_input(self, i):
- """ helper function to set the input data
- """
- return eq(self.p.data_i, i)
-
- def __iter__(self):
- yield from self.p
- for n in self.n:
- yield from n
-
- def ports(self):
- return list(self)
-
-
-class CombMultiOutPipeline(MultiOutControlBase):
- """ A multi-input Combinatorial block conforming to the Pipeline API
-
- Attributes:
- -----------
- p.data_i : stage input data (non-array). shaped according to ispec
- n.data_o : stage output data array. shaped according to ospec
- """
-
- def __init__(self, stage, n_len, n_mux):
- MultiOutControlBase.__init__(self, n_len=n_len)
- self.stage = stage
- self.n_mux = n_mux
-
- # set up the input and output data
- self.p.data_i = _spec(stage.ispec, 'data_i') # input type
- for i in range(n_len):
- name = 'data_o_%d' % i
- self.n[i].data_o = _spec(stage.ospec, name) # output type
-
- def process(self, i):
- if hasattr(self.stage, "process"):
- return self.stage.process(i)
- return i
-
- def elaborate(self, platform):
- m = MultiOutControlBase.elaborate(self, platform)
-
- if hasattr(self.n_mux, "elaborate"): # TODO: identify submodule?
- m.submodules += self.n_mux
-
- # need buffer register conforming to *input* spec
- r_data = _spec(self.stage.ispec, 'r_data') # input type
- if hasattr(self.stage, "setup"):
- self.stage.setup(m, r_data)
-
- # multiplexer id taken from n_mux
- mid = self.n_mux.m_id
-
- # temporaries
- p_valid_i = Signal(reset_less=True)
- pv = Signal(reset_less=True)
- m.d.comb += p_valid_i.eq(self.p.valid_i_test)
- m.d.comb += pv.eq(self.p.valid_i & self.p.ready_o)
-
- # all outputs to next stages first initialised to zero (invalid)
- # the only output "active" is then selected by the muxid
- for i in range(len(self.n)):
- m.d.comb += self.n[i].valid_o.eq(0)
- data_valid = self.n[mid].valid_o
- m.d.comb += self.p.ready_o.eq(~data_valid | self.n[mid].ready_i)
- m.d.comb += data_valid.eq(p_valid_i | \
- (~self.n[mid].ready_i & data_valid))
- with m.If(pv):
- m.d.comb += eq(r_data, self.p.data_i)
- m.d.comb += eq(self.n[mid].data_o, self.process(r_data))
-
- return m
-
-
-class CombMultiInPipeline(MultiInControlBase):
- """ A multi-input Combinatorial block conforming to the Pipeline API
-
- Attributes:
- -----------
- p.data_i : StageInput, shaped according to ispec
- The pipeline input
- p.data_o : StageOutput, shaped according to ospec
- The pipeline output
- r_data : input_shape according to ispec
- A temporary (buffered) copy of a prior (valid) input.
- This is HELD if the output is not ready. It is updated
- SYNCHRONOUSLY.
- """
-
- def __init__(self, stage, p_len, p_mux):
- MultiInControlBase.__init__(self, p_len=p_len)
- self.stage = stage
- self.p_mux = p_mux
-
- # set up the input and output data
- for i in range(p_len):
- name = 'data_i_%d' % i
- self.p[i].data_i = _spec(stage.ispec, name) # input type
- self.n.data_o = _spec(stage.ospec, 'data_o')
-
- def process(self, i):
- if hasattr(self.stage, "process"):
- return self.stage.process(i)
- return i
-
- def elaborate(self, platform):
- m = MultiInControlBase.elaborate(self, platform)
-
- m.submodules += self.p_mux
-
- # need an array of buffer registers conforming to *input* spec
- r_data = []
- data_valid = []
- p_valid_i = []
- n_ready_in = []
- p_len = len(self.p)
- for i in range(p_len):
- name = 'r_%d' % i
- r = _spec(self.stage.ispec, name) # input type
- r_data.append(r)
- data_valid.append(Signal(name="data_valid", reset_less=True))
- p_valid_i.append(Signal(name="p_valid_i", reset_less=True))
- n_ready_in.append(Signal(name="n_ready_in", reset_less=True))
- if hasattr(self.stage, "setup"):
- self.stage.setup(m, r)
- if len(r_data) > 1:
- r_data = Array(r_data)
- p_valid_i = Array(p_valid_i)
- n_ready_in = Array(n_ready_in)
- data_valid = Array(data_valid)
-
- nirn = Signal(reset_less=True)
- m.d.comb += nirn.eq(~self.n.ready_i)
- mid = self.p_mux.m_id
- for i in range(p_len):
- m.d.comb += data_valid[i].eq(0)
- m.d.comb += n_ready_in[i].eq(1)
- m.d.comb += p_valid_i[i].eq(0)
- m.d.comb += self.p[i].ready_o.eq(0)
- m.d.comb += p_valid_i[mid].eq(self.p_mux.active)
- m.d.comb += self.p[mid].ready_o.eq(~data_valid[mid] | self.n.ready_i)
- m.d.comb += n_ready_in[mid].eq(nirn & data_valid[mid])
- anyvalid = Signal(i, reset_less=True)
- av = []
- for i in range(p_len):
- av.append(data_valid[i])
- anyvalid = Cat(*av)
- m.d.comb += self.n.valid_o.eq(anyvalid.bool())
- m.d.comb += data_valid[mid].eq(p_valid_i[mid] | \
- (n_ready_in[mid] & data_valid[mid]))
-
- for i in range(p_len):
- vr = Signal(reset_less=True)
- m.d.comb += vr.eq(self.p[i].valid_i & self.p[i].ready_o)
- with m.If(vr):
- m.d.comb += eq(r_data[i], self.p[i].data_i)
-
- m.d.comb += eq(self.n.data_o, self.process(r_data[mid]))
-
- return m
-
-
-class CombMuxOutPipe(CombMultiOutPipeline):
- def __init__(self, stage, n_len):
- # HACK: stage is also the n-way multiplexer
- CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
-
- # HACK: n-mux is also the stage... so set the muxid equal to input mid
- stage.m_id = self.p.data_i.mid
-
-
-
-class InputPriorityArbiter(Elaboratable):
- """ arbitration module for Input-Mux pipe, baed on PriorityEncoder
- """
- def __init__(self, pipe, num_rows):
- self.pipe = pipe
- self.num_rows = num_rows
- self.mmax = int(log(self.num_rows) / log(2))
- self.m_id = Signal(self.mmax, reset_less=True) # multiplex id
- self.active = Signal(reset_less=True)
-
- def elaborate(self, platform):
- m = Module()
-
- assert len(self.pipe.p) == self.num_rows, \
- "must declare input to be same size"
- pe = PriorityEncoder(self.num_rows)
- m.submodules.selector = pe
-
- # connect priority encoder
- in_ready = []
- for i in range(self.num_rows):
- p_valid_i = Signal(reset_less=True)
- m.d.comb += p_valid_i.eq(self.pipe.p[i].valid_i_test)
- in_ready.append(p_valid_i)
- m.d.comb += pe.i.eq(Cat(*in_ready)) # array of input "valids"
- m.d.comb += self.active.eq(~pe.n) # encoder active (one input valid)
- m.d.comb += self.m_id.eq(pe.o) # output one active input
-
- return m
-
- def ports(self):
- return [self.m_id, self.active]
-
-
-
-class PriorityCombMuxInPipe(CombMultiInPipeline):
- """ an example of how to use the combinatorial pipeline.
- """
-
- def __init__(self, stage, p_len=2):
- p_mux = InputPriorityArbiter(self, p_len)
- CombMultiInPipeline.__init__(self, stage, p_len, p_mux)
-
-
-if __name__ == '__main__':
-
- dut = PriorityCombMuxInPipe(ExampleStage)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_combpipe.il", "w") as f:
- f.write(vl)
+++ /dev/null
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen.cli import main, verilog
-from fpadd.statemachine import FPADDBase, FPADD
-from fpadd.pipeline import FPADDMuxInOut
-
-if __name__ == "__main__":
- if True:
- alu = FPADD(width=32, id_wid=5, single_cycle=True)
- main(alu, ports=alu.rs[0][0].ports() + \
- alu.rs[0][1].ports() + \
- alu.res[0].ports() + \
- [alu.ids.in_mid, alu.ids.out_mid])
- else:
- alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
- main(alu, ports=[alu.in_a, alu.in_b] + \
- alu.in_t.ports() + \
- alu.out_z.ports() + \
- [alu.in_mid, alu.out_mid])
-
-
- # works... but don't use, just do "python fname.py convert -t v"
- #print (verilog.convert(alu, ports=[
- # ports=alu.in_a.ports() + \
- # alu.in_b.ports() + \
- # alu.out_z.ports())
+++ /dev/null
-# IEEE Floating Point Divider (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Const, Cat
-from nmigen.cli import main, verilog
-
-from fpbase import FPNumIn, FPNumOut, FPOpIn, FPOpOut, Overflow, FPBase, FPState
-from singlepipe import eq
-
-class Div:
- def __init__(self, width):
- self.width = width
- self.quot = Signal(width) # quotient
- self.dor = Signal(width) # divisor
- self.dend = Signal(width) # dividend
- self.rem = Signal(width) # remainder
- self.count = Signal(7) # loop count
-
- self.czero = Const(0, width)
-
- def reset(self, m):
- m.d.sync += [
- self.quot.eq(self.czero),
- self.rem.eq(self.czero),
- self.count.eq(Const(0, 7))
- ]
-
-
-class FPDIV(FPBase):
-
- def __init__(self, width):
- FPBase.__init__(self)
- self.width = width
-
- self.in_a = FPOpIn(width)
- self.in_b = FPOpIn(width)
- self.out_z = FPOpOut(width)
-
- self.states = []
-
- def add_state(self, state):
- self.states.append(state)
- return state
-
- def elaborate(self, platform=None):
- """ creates the HDL code-fragment for FPDiv
- """
- m = Module()
-
- # Latches
- a = FPNumIn(None, self.width, False)
- b = FPNumIn(None, self.width, False)
- z = FPNumOut(self.width, False)
-
- div = Div(a.m_width*2 + 3) # double the mantissa width plus g/r/sticky
-
- of = Overflow()
- m.submodules.in_a = a
- m.submodules.in_b = b
- m.submodules.z = z
- m.submodules.of = of
-
- m.d.comb += a.v.eq(self.in_a.v)
- m.d.comb += b.v.eq(self.in_b.v)
-
- with m.FSM() as fsm:
-
- # ******
- # gets operand a
-
- with m.State("get_a"):
- res = self.get_op(m, self.in_a, a, "get_b")
- m.d.sync += eq([a, self.in_a.ready_o], res)
-
- # ******
- # gets operand b
-
- with m.State("get_b"):
- res = self.get_op(m, self.in_b, b, "special_cases")
- m.d.sync += eq([b, self.in_b.ready_o], res)
-
- # ******
- # special cases: NaNs, infs, zeros, denormalised
- # NOTE: some of these are unique to div. see "Special Operations"
- # https://steve.hollasch.net/cgindex/coding/ieeefloat.html
-
- with m.State("special_cases"):
-
- # if a is NaN or b is NaN return NaN
- with m.If(a.is_nan | b.is_nan):
- m.next = "put_z"
- m.d.sync += z.nan(1)
-
- # if a is Inf and b is Inf return NaN
- with m.Elif(a.is_inf & b.is_inf):
- m.next = "put_z"
- m.d.sync += z.nan(1)
-
- # if a is inf return inf (or NaN if b is zero)
- with m.Elif(a.is_inf):
- m.next = "put_z"
- m.d.sync += z.inf(a.s ^ b.s)
-
- # if b is inf return zero
- with m.Elif(b.is_inf):
- m.next = "put_z"
- m.d.sync += z.zero(a.s ^ b.s)
-
- # if a is zero return zero (or NaN if b is zero)
- with m.Elif(a.is_zero):
- m.next = "put_z"
- # if b is zero return NaN
- with m.If(b.is_zero):
- m.d.sync += z.nan(1)
- with m.Else():
- m.d.sync += z.zero(a.s ^ b.s)
-
- # if b is zero return Inf
- with m.Elif(b.is_zero):
- m.next = "put_z"
- m.d.sync += z.inf(a.s ^ b.s)
-
- # Denormalised Number checks
- with m.Else():
- m.next = "normalise_a"
- self.denormalise(m, a)
- self.denormalise(m, b)
-
- # ******
- # normalise_a
-
- with m.State("normalise_a"):
- self.op_normalise(m, a, "normalise_b")
-
- # ******
- # normalise_b
-
- with m.State("normalise_b"):
- self.op_normalise(m, b, "divide_0")
-
- # ******
- # First stage of divide. initialise state
-
- with m.State("divide_0"):
- m.next = "divide_1"
- m.d.sync += [
- z.s.eq(a.s ^ b.s), # sign
- z.e.eq(a.e - b.e), # exponent
- div.dend.eq(a.m<<(a.m_width+3)), # 3 bits for g/r/sticky
- div.dor.eq(b.m),
- ]
- div.reset(m)
-
- # ******
- # Second stage of divide.
-
- with m.State("divide_1"):
- m.next = "divide_2"
- m.d.sync += [
- div.quot.eq(div.quot << 1),
- div.rem.eq(Cat(div.dend[-1], div.rem[0:])),
- div.dend.eq(div.dend << 1),
- ]
-
- # ******
- # Third stage of divide.
- # This stage ends by jumping out to divide_3
- # However it defaults to jumping to divide_1 (which comes back here)
-
- with m.State("divide_2"):
- with m.If(div.rem >= div.dor):
- m.d.sync += [
- div.quot[0].eq(1),
- div.rem.eq(div.rem - div.dor),
- ]
- with m.If(div.count == div.width-2):
- m.next = "divide_3"
- with m.Else():
- m.next = "divide_1"
- m.d.sync += [
- div.count.eq(div.count + 1),
- ]
-
- # ******
- # Fourth stage of divide.
-
- with m.State("divide_3"):
- m.next = "normalise_1"
- m.d.sync += [
- z.m.eq(div.quot[3:]),
- of.guard.eq(div.quot[2]),
- of.round_bit.eq(div.quot[1]),
- of.sticky.eq(div.quot[0] | (div.rem != 0))
- ]
-
- # ******
- # First stage of normalisation.
-
- with m.State("normalise_1"):
- self.normalise_1(m, z, of, "normalise_2")
-
- # ******
- # Second stage of normalisation.
-
- with m.State("normalise_2"):
- self.normalise_2(m, z, of, "round")
-
- # ******
- # rounding stage
-
- with m.State("round"):
- self.roundz(m, z, of.roundz)
- m.next = "corrections"
-
- # ******
- # correction stage
-
- with m.State("corrections"):
- self.corrections(m, z, "pack")
-
- # ******
- # pack stage
-
- with m.State("pack"):
- self.pack(m, z, "put_z")
-
- # ******
- # put_z stage
-
- with m.State("put_z"):
- self.put_z(m, z, self.out_z, "get_a")
-
- return m
-
-
-if __name__ == "__main__":
- alu = FPDIV(width=32)
- main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports())
-
-
- # works... but don't use, just do "python fname.py convert -t v"
- #print (verilog.convert(alu, ports=[
- # ports=alu.in_a.ports() + \
- # alu.in_b.ports() + \
- # alu.out_z.ports())
+++ /dev/null
-""" nmigen operator functions / utils
-
- eq:
- --
-
- a strategically very important function that is identical in function
- to nmigen's Signal.eq function, except it may take objects, or a list
- of objects, or a tuple of objects, and where objects may also be
- Records.
-"""
-
-from nmigen import Signal, Cat, Const, Mux, Module, Value, Elaboratable
-from nmigen.cli import verilog, rtlil
-from nmigen.lib.fifo import SyncFIFO, SyncFIFOBuffered
-from nmigen.hdl.ast import ArrayProxy
-from nmigen.hdl.rec import Record, Layout
-
-from abc import ABCMeta, abstractmethod
-from collections.abc import Sequence, Iterable
-from collections import OrderedDict
-from queue import Queue
-import inspect
-
-
-class Visitor2:
- """ a helper class for iterating twin-argument compound data structures.
-
- Record is a special (unusual, recursive) case, where the input may be
- specified as a dictionary (which may contain further dictionaries,
- recursively), where the field names of the dictionary must match
- the Record's field spec. Alternatively, an object with the same
- member names as the Record may be assigned: it does not have to
- *be* a Record.
-
- ArrayProxy is also special-cased, it's a bit messy: whilst ArrayProxy
- has an eq function, the object being assigned to it (e.g. a python
- object) might not. despite the *input* having an eq function,
- that doesn't help us, because it's the *ArrayProxy* that's being
- assigned to. so.... we cheat. use the ports() function of the
- python object, enumerate them, find out the list of Signals that way,
- and assign them.
- """
- def iterator2(self, o, i):
- if isinstance(o, dict):
- yield from self.dict_iter2(o, i)
-
- if not isinstance(o, Sequence):
- o, i = [o], [i]
- for (ao, ai) in zip(o, i):
- #print ("visit", fn, ao, ai)
- if isinstance(ao, Record):
- yield from self.record_iter2(ao, ai)
- elif isinstance(ao, ArrayProxy) and not isinstance(ai, Value):
- yield from self.arrayproxy_iter2(ao, ai)
- else:
- yield (ao, ai)
-
- def dict_iter2(self, o, i):
- for (k, v) in o.items():
- print ("d-iter", v, i[k])
- yield (v, i[k])
- return res
-
- def _not_quite_working_with_all_unit_tests_record_iter2(self, ao, ai):
- print ("record_iter2", ao, ai, type(ao), type(ai))
- if isinstance(ai, Value):
- if isinstance(ao, Sequence):
- ao, ai = [ao], [ai]
- for o, i in zip(ao, ai):
- yield (o, i)
- return
- for idx, (field_name, field_shape, _) in enumerate(ao.layout):
- if isinstance(field_shape, Layout):
- val = ai.fields
- else:
- val = ai
- if hasattr(val, field_name): # check for attribute
- val = getattr(val, field_name)
- else:
- val = val[field_name] # dictionary-style specification
- yield from self.iterator2(ao.fields[field_name], val)
-
- def record_iter2(self, ao, ai):
- for idx, (field_name, field_shape, _) in enumerate(ao.layout):
- if isinstance(field_shape, Layout):
- val = ai.fields
- else:
- val = ai
- if hasattr(val, field_name): # check for attribute
- val = getattr(val, field_name)
- else:
- val = val[field_name] # dictionary-style specification
- yield from self.iterator2(ao.fields[field_name], val)
-
- def arrayproxy_iter2(self, ao, ai):
- for p in ai.ports():
- op = getattr(ao, p.name)
- print ("arrayproxy - p", p, p.name)
- yield from self.iterator2(op, p)
-
-
-class Visitor:
- """ a helper class for iterating single-argument compound data structures.
- similar to Visitor2.
- """
- def iterate(self, i):
- """ iterate a compound structure recursively using yield
- """
- if not isinstance(i, Sequence):
- i = [i]
- for ai in i:
- #print ("iterate", ai)
- if isinstance(ai, Record):
- #print ("record", list(ai.layout))
- yield from self.record_iter(ai)
- elif isinstance(ai, ArrayProxy) and not isinstance(ai, Value):
- yield from self.array_iter(ai)
- else:
- yield ai
-
- def record_iter(self, ai):
- for idx, (field_name, field_shape, _) in enumerate(ai.layout):
- if isinstance(field_shape, Layout):
- val = ai.fields
- else:
- val = ai
- if hasattr(val, field_name): # check for attribute
- val = getattr(val, field_name)
- else:
- val = val[field_name] # dictionary-style specification
- #print ("recidx", idx, field_name, field_shape, val)
- yield from self.iterate(val)
-
- def array_iter(self, ai):
- for p in ai.ports():
- yield from self.iterate(p)
-
-
-def eq(o, i):
- """ makes signals equal: a helper routine which identifies if it is being
- passed a list (or tuple) of objects, or signals, or Records, and calls
- the objects' eq function.
- """
- res = []
- for (ao, ai) in Visitor2().iterator2(o, i):
- rres = ao.eq(ai)
- if not isinstance(rres, Sequence):
- rres = [rres]
- res += rres
- return res
-
-
-def shape(i):
- #print ("shape", i)
- r = 0
- for part in list(i):
- #print ("shape?", part)
- s, _ = part.shape()
- r += s
- return r, False
-
-
-def cat(i):
- """ flattens a compound structure recursively using Cat
- """
- from nmigen.tools import flatten
- #res = list(flatten(i)) # works (as of nmigen commit f22106e5) HOWEVER...
- res = list(Visitor().iterate(i)) # needed because input may be a sequence
- return Cat(*res)
-
-
+++ /dev/null
-""" Example 5: Making use of PyRTL and Introspection. """
-
-from collections.abc import Sequence
-
-from nmigen import Signal
-from nmigen.hdl.rec import Record
-from nmigen import tracer
-from nmigen.compat.fhdl.bitcontainer import value_bits_sign
-from contextlib import contextmanager
-
-from nmoperator import eq
-from singlepipe import StageCls, ControlBase, BufferedHandshake
-from singlepipe import UnbufferedPipeline
-
-
-# The following example shows how pyrtl can be used to make some interesting
-# hardware structures using python introspection. In particular, this example
-# makes a N-stage pipeline structure. Any specific pipeline is then a derived
-# class of SimplePipeline where methods with names starting with "stage" are
-# stages, and new members with names not starting with "_" are to be registered
-# for the next stage.
-
-def like(value, rname, pipe, pipemode=False):
- if isinstance(value, ObjectProxy):
- return ObjectProxy.like(pipe, value, pipemode=pipemode,
- name=rname, reset_less=True)
- else:
- return Signal(value_bits_sign(value), name=rname,
- reset_less=True)
- return Signal.like(value, name=rname, reset_less=True)
-
-def get_assigns(_assigns):
- assigns = []
- for e in _assigns:
- if isinstance(e, ObjectProxy):
- assigns += get_assigns(e._assigns)
- else:
- assigns.append(e)
- return assigns
-
-
-def get_eqs(_eqs):
- eqs = []
- for e in _eqs:
- if isinstance(e, ObjectProxy):
- eqs += get_eqs(e._eqs)
- else:
- eqs.append(e)
- return eqs
-
-
-class ObjectProxy:
- def __init__(self, m, name=None, pipemode=False, syncmode=True):
- self._m = m
- if name is None:
- name = tracer.get_var_name(default=None)
- self.name = name
- self._pipemode = pipemode
- self._syncmode = syncmode
- self._eqs = {}
- self._assigns = []
- self._preg_map = {}
-
- @classmethod
- def like(cls, m, value, pipemode=False, name=None, src_loc_at=0, **kwargs):
- name = name or tracer.get_var_name(depth=2 + src_loc_at,
- default="$like")
-
- src_loc_at_1 = 1 + src_loc_at
- r = ObjectProxy(m, value.name, pipemode)
- #for a, aname in value._preg_map.items():
- # r._preg_map[aname] = like(a, aname, m, pipemode)
- for a in value.ports():
- aname = a.name
- r._preg_map[aname] = like(a, aname, m, pipemode)
- return r
-
- def __repr__(self):
- subobjs = []
- for a in self.ports():
- aname = a.name
- ai = self._preg_map[aname]
- subobjs.append(repr(ai))
- return "<OP %s>" % subobjs
-
- def get_specs(self, liked=False):
- res = []
- for k, v in self._preg_map.items():
- #v = like(v, k, stage._m)
- res.append(v)
- if isinstance(v, ObjectProxy):
- res += v.get_specs()
- return res
-
- def eq(self, i):
- print ("ObjectProxy eq", self, i)
- res = []
- for a in self.ports():
- aname = a.name
- ai = i._preg_map[aname]
- res.append(a.eq(ai))
- return res
-
- def ports(self):
- res = []
- for aname, a in self._preg_map.items():
- if isinstance(a, Signal) or isinstance(a, ObjectProxy) or \
- isinstance(a, Record):
- res.append(a)
- #print ("ObjectPorts", res)
- return res
-
- def __getattr__(self, name):
- try:
- v = self._preg_map[name]
- return v
- #return like(v, name, self._m)
- except KeyError:
- raise AttributeError(
- 'error, no pipeline register "%s" defined for OP %s'
- % (name, self.name))
-
- def __setattr__(self, name, value):
- if name.startswith('_') or name in ['name', 'ports', 'eq', 'like']:
- # do not do anything tricky with variables starting with '_'
- object.__setattr__(self, name, value)
- return
- #rname = "%s_%s" % (self.name, name)
- rname = name
- new_pipereg = like(value, rname, self._m, self._pipemode)
- self._preg_map[name] = new_pipereg
- #object.__setattr__(self, name, new_pipereg)
- if self._pipemode:
- #print ("OP pipemode", self._syncmode, new_pipereg, value)
- assign = eq(new_pipereg, value)
- if self._syncmode:
- self._m.d.sync += assign
- else:
- self._m.d.comb += assign
- elif self._m:
- #print ("OP !pipemode assign", new_pipereg, value, type(value))
- self._m.d.comb += eq(new_pipereg, value)
- else:
- #print ("OP !pipemode !m", new_pipereg, value, type(value))
- self._assigns += eq(new_pipereg, value)
- if isinstance(value, ObjectProxy):
- #print ("OP, defer assigns:", value._assigns)
- self._assigns += value._assigns
- self._eqs.append(value._eqs)
-
-
-class PipelineStage:
- """ Pipeline builder stage with auto generation of pipeline registers.
- """
-
- def __init__(self, name, m, prev=None, pipemode=False, ispec=None):
- self._m = m
- self._stagename = name
- self._preg_map = {'__nextstage__': {}}
- self._prev_stage = prev
- self._ispec = ispec
- if ispec:
- self._preg_map[self._stagename] = ispec
- if prev:
- print ("prev", prev._stagename, prev._preg_map)
- #if prev._stagename in prev._preg_map:
- # m = prev._preg_map[prev._stagename]
- # self._preg_map[prev._stagename] = m
- if '__nextstage__' in prev._preg_map:
- m = prev._preg_map['__nextstage__']
- m = likedict(m)
- self._preg_map[self._stagename] = m
- #for k, v in m.items():
- #m[k] = like(v, k, self._m)
- print ("make current", self._stagename, m)
- self._pipemode = pipemode
- self._eqs = {}
- self._assigns = []
-
- def __getattribute__(self, name):
- if name.startswith('_'):
- return object.__getattribute__(self, name)
- #if name in self._preg_map['__nextstage__']:
- # return self._preg_map['__nextstage__'][name]
- try:
- print ("getattr", name, object.__getattribute__(self, '_preg_map'))
- v = self._preg_map[self._stagename][name]
- return v
- #return like(v, name, self._m)
- except KeyError:
- raise AttributeError(
- 'error, no pipeline register "%s" defined for stage %s'
- % (name, self._stagename))
-
- def __setattr__(self, name, value):
- if name.startswith('_'):
- # do not do anything tricky with variables starting with '_'
- object.__setattr__(self, name, value)
- return
- pipereg_id = self._stagename
- rname = 'pipereg_' + pipereg_id + '_' + name
- new_pipereg = like(value, rname, self._m, self._pipemode)
- next_stage = '__nextstage__'
- if next_stage not in self._preg_map:
- self._preg_map[next_stage] = {}
- self._preg_map[next_stage][name] = new_pipereg
- print ("setattr", name, value, self._preg_map)
- if self._pipemode:
- self._eqs[name] = new_pipereg
- assign = eq(new_pipereg, value)
- print ("pipemode: append", new_pipereg, value, assign)
- if isinstance(value, ObjectProxy):
- print ("OP, assigns:", value._assigns)
- self._assigns += value._assigns
- self._eqs[name]._eqs = value._eqs
- #self._m.d.comb += assign
- self._assigns += assign
- elif self._m:
- print ("!pipemode: assign", new_pipereg, value)
- assign = eq(new_pipereg, value)
- self._m.d.sync += assign
- else:
- print ("!pipemode !m: defer assign", new_pipereg, value)
- assign = eq(new_pipereg, value)
- self._eqs[name] = new_pipereg
- self._assigns += assign
- if isinstance(value, ObjectProxy):
- print ("OP, defer assigns:", value._assigns)
- self._assigns += value._assigns
- self._eqs[name]._eqs = value._eqs
-
-def likelist(specs):
- res = []
- for v in specs:
- res.append(like(v, v.name, None, pipemode=True))
- return res
-
-def likedict(specs):
- if not isinstance(specs, dict):
- return like(specs, specs.name, None, pipemode=True)
- res = {}
- for k, v in specs.items():
- res[k] = likedict(v)
- return res
-
-
-class AutoStage(StageCls):
- def __init__(self, inspecs, outspecs, eqs, assigns):
- self.inspecs, self.outspecs = inspecs, outspecs
- self.eqs, self.assigns = eqs, assigns
- #self.o = self.ospec()
- def ispec(self): return likedict(self.inspecs)
- def ospec(self): return likedict(self.outspecs)
-
- def process(self, i):
- print ("stage process", i)
- return self.eqs
-
- def setup(self, m, i):
- print ("stage setup i", i, m)
- print ("stage setup inspecs", self.inspecs)
- print ("stage setup outspecs", self.outspecs)
- print ("stage setup eqs", self.eqs)
- #self.o = self.ospec()
- m.d.comb += eq(self.inspecs, i)
- #m.d.comb += eq(self.outspecs, self.eqs)
- #m.d.comb += eq(self.o, i)
-
-
-class AutoPipe(UnbufferedPipeline):
- def __init__(self, stage, assigns):
- UnbufferedPipeline.__init__(self, stage)
- self.assigns = assigns
-
- def elaborate(self, platform):
- m = UnbufferedPipeline.elaborate(self, platform)
- m.d.comb += self.assigns
- print ("assigns", self.assigns, m)
- return m
-
-
-class PipeManager:
- def __init__(self, m, pipemode=False, pipetype=None):
- self.m = m
- self.pipemode = pipemode
- self.pipetype = pipetype
-
- @contextmanager
- def Stage(self, name, prev=None, ispec=None):
- if ispec:
- ispec = likedict(ispec)
- print ("start stage", name, ispec)
- stage = PipelineStage(name, None, prev, self.pipemode, ispec=ispec)
- try:
- yield stage, self.m #stage._m
- finally:
- pass
- if self.pipemode:
- if stage._ispec:
- print ("use ispec", stage._ispec)
- inspecs = stage._ispec
- else:
- inspecs = self.get_specs(stage, name)
- #inspecs = likedict(inspecs)
- outspecs = self.get_specs(stage, '__nextstage__', liked=True)
- print ("stage inspecs", name, inspecs)
- print ("stage outspecs", name, outspecs)
- eqs = stage._eqs # get_eqs(stage._eqs)
- assigns = get_assigns(stage._assigns)
- print ("stage eqs", name, eqs)
- print ("stage assigns", name, assigns)
- s = AutoStage(inspecs, outspecs, eqs, assigns)
- self.stages.append(s)
- print ("end stage", name, self.pipemode, "\n")
-
- def get_specs(self, stage, name, liked=False):
- return stage._preg_map[name]
- if name in stage._preg_map:
- res = []
- for k, v in stage._preg_map[name].items():
- #v = like(v, k, stage._m)
- res.append(v)
- #if isinstance(v, ObjectProxy):
- # res += v.get_specs()
- return res
- return {}
-
- def __enter__(self):
- self.stages = []
- return self
-
- def __exit__(self, *args):
- print ("exit stage", args)
- pipes = []
- cb = ControlBase()
- for s in self.stages:
- print ("stage specs", s, s.inspecs, s.outspecs)
- if self.pipetype == 'buffered':
- p = BufferedHandshake(s)
- else:
- p = AutoPipe(s, s.assigns)
- pipes.append(p)
- self.m.submodules += p
-
- self.m.d.comb += cb.connect(pipes)
-
-
-class SimplePipeline:
- """ Pipeline builder with auto generation of pipeline registers.
- """
-
- def __init__(self, m):
- self._m = m
- self._pipeline_register_map = {}
- self._current_stage_num = 0
-
- def _setup(self):
- stage_list = []
- for method in dir(self):
- if method.startswith('stage'):
- stage_list.append(method)
- for stage in sorted(stage_list):
- stage_method = getattr(self, stage)
- stage_method()
- self._current_stage_num += 1
-
- def __getattr__(self, name):
- try:
- return self._pipeline_register_map[self._current_stage_num][name]
- except KeyError:
- raise AttributeError(
- 'error, no pipeline register "%s" defined for stage %d'
- % (name, self._current_stage_num))
-
- def __setattr__(self, name, value):
- if name.startswith('_'):
- # do not do anything tricky with variables starting with '_'
- object.__setattr__(self, name, value)
- return
- next_stage = self._current_stage_num + 1
- pipereg_id = str(self._current_stage_num) + 'to' + str(next_stage)
- rname = 'pipereg_' + pipereg_id + '_' + name
- #new_pipereg = Signal(value_bits_sign(value), name=rname,
- # reset_less=True)
- if isinstance(value, ObjectProxy):
- new_pipereg = ObjectProxy.like(self._m, value,
- name=rname, reset_less = True)
- else:
- new_pipereg = Signal.like(value, name=rname, reset_less = True)
- if next_stage not in self._pipeline_register_map:
- self._pipeline_register_map[next_stage] = {}
- self._pipeline_register_map[next_stage][name] = new_pipereg
- self._m.d.sync += eq(new_pipereg, value)
-
+++ /dev/null
-""" Example 5: Making use of PyRTL and Introspection. """
-
-from nmigen import Module, Signal, Const
-from nmigen.cli import main, verilog, rtlil
-
-
-from pipeline import SimplePipeline, ObjectProxy, PipeManager
-
-
-class SimplePipelineExample(SimplePipeline):
- """ A very simple pipeline to show how registers are inferred. """
-
- def __init__(self, pipe):
- SimplePipeline.__init__(self, pipe)
- self._loopback = Signal(4)
- self._setup()
-
- def stage0(self):
- self.n = ~self._loopback
-
- def stage1(self):
- self.n = self.n + 2
-
- def stage2(self):
- localv = Signal(4)
- self._pipe.comb += localv.eq(2)
- self.n = self.n << localv
-
- def stage3(self):
- self.n = ~self.n
-
- def stage4(self):
- self._pipe.sync += self._loopback.eq(self.n + 3)
-
-
-class ObjectBasedPipelineExample(SimplePipeline):
- """ A very simple pipeline to show how registers are inferred. """
-
- def __init__(self, m):
- SimplePipeline.__init__(self, m)
- self._loopback = Signal(4)
- o = ObjectProxy(m)
- o.a = Signal(4)
- o.b = Signal(4)
- self._obj = o
- self._setup()
-
- def stage0(self):
- self.n = ~self._loopback
- self.o = self._obj
-
- def stage1(self):
- self.n = self.n + self.o.a
- o = ObjectProxy(self._m)
- o.c = self.n
- o.d = self.o.b + self.n + Const(5)
- self.o = o
-
- def stage2(self):
- localv = Signal(4)
- self._m.d.comb += localv.eq(2)
- self.n = self.n << localv
- o = ObjectProxy(self._m)
- o.e = self.n + self.o.c + self.o.d
- self.o = o
-
- def stage3(self):
- self.n = ~self.n
- self.o = self.o
- self.o.e = self.o.e + self.n
-
- def stage4(self):
- self._m.d.sync += self._loopback.eq(self.n + 3 + self.o.e)
-
-
-class PipeModule:
-
- def __init__(self):
- self.m = Module()
- self.p = ObjectBasedPipelineExample(self.m)
-
- def elaborate(self, platform=None):
- return self.m
-
-
-class PipelineStageExample:
-
- def __init__(self):
- self._loopback = Signal(4, name="loopback")
-
- def elaborate(self, platform=None):
-
- m = Module()
-
- with PipeManager(m, pipemode=True) as pipe:
-
- ispec={'loopback': self._loopback}
- with pipe.Stage("first", ispec=ispec) as (p, m):
- p.n = ~p.loopback
- with pipe.Stage("second", p) as (p, m):
- #p.n = ~self._loopback + 2
- p.n = p.n + Const(2)
- with pipe.Stage("third", p) as (p, m):
- #p.n = ~self._loopback + 5
- localv = Signal(4)
- m.d.comb += localv.eq(2)
- p.n = p.n << localv + Const(1)
- #p.m = p.n + 2
-
- print (pipe.stages)
-
- return m
-
-class PipelineStageObjectExample:
-
- def __init__(self):
- self.loopback = Signal(4)
-
- def elaborate(self, platform=None):
-
- m = Module()
-
- o = ObjectProxy(None, pipemode=False)
- o.a = Signal(4)
- o.b = Signal(4)
- self.obj = o
-
- localv2 = Signal(4)
- m.d.sync += localv2.eq(localv2 + 3)
-
- #m.d.comb += self.obj.a.eq(localv2 + 1)
- #m.d.sync += self._loopback.eq(localv2)
-
- ispec= {'loopback': self.loopback, 'obj': self.obj}
- with PipeManager(m, pipemode=True) as pipe:
-
- with pipe.Stage("first", ispec=ispec) as (p, m):
- p.n = ~p.loopback
- p.o = p.obj
- with pipe.Stage("second", p) as (p, m):
- #p.n = ~self.loopback + 2
- localn = Signal(4)
- m.d.comb += localn.eq(p.n)
- o = ObjectProxy(None, pipemode=False)
- o.c = localn
- o.d = p.o.b + localn + Const(5)
- p.n = localn
- p.o = o
- with pipe.Stage("third", p) as (p, m):
- #p.n = ~self._loopback + 5
- localv = Signal(4)
- m.d.comb += localv.eq(2)
- p.n = p.n << localv
- o = ObjectProxy(None, pipemode=False)
- o.e = p.n + p.o.c + p.o.d
- p.o = o
-
- print ("stages", pipe.stages)
-
- return m
-
-
-class PipelineStageObjectExample2:
-
- def __init__(self):
- self._loopback = Signal(4)
-
- def elaborate(self, platform=None):
-
- m = Module()
-
- ispec= [self._loopback]
- with PipeManager(m, pipemode=True) as pipe:
-
- with pipe.Stage("first",
- ispec=ispec) as (p, m):
- p.n = ~self._loopback
- o = ObjectProxy(None, pipemode=False)
- o.b = ~self._loopback + Const(5)
- p.o = o
-
- print ("stages", pipe.stages)
-
- return m
-
-
-
-if __name__ == "__main__":
- example = PipeModule()
- with open("pipe_module.il", "w") as f:
- f.write(rtlil.convert(example, ports=[
- example.p._loopback,
- ]))
- example = PipelineStageExample()
- with open("pipe_stage_module.il", "w") as f:
- f.write(rtlil.convert(example, ports=[
- example._loopback,
- ]))
- #exit(0)
- example = PipelineStageObjectExample()
- with open("pipe_stage_object_module.il", "w") as f:
- f.write(rtlil.convert(example, ports=[
- example.loopback,
- ]))
+++ /dev/null
-# Copyright (c) 2014 - 2019 The Regents of the University of
-# California (Regents). All Rights Reserved. Redistribution and use in
-# source and binary forms, with or without modification, are permitted
-# provided that the following conditions are met:
-# * Redistributions of source code must retain the above
-# copyright notice, this list of conditions and the following
-# two paragraphs of disclaimer.
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# two paragraphs of disclaimer in the documentation and/or other materials
-# provided with the distribution.
-# * Neither the name of the Regents nor the names of its contributors
-# may be used to endorse or promote products derived from this
-# software without specific prior written permission.
-# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
-# SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
-# ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
-# REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF
-# ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION
-# TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
-# MODIFICATIONS.
-
-from nmigen import Module, Signal, Memory, Mux, Elaboratable
-from nmigen.tools import bits_for
-from nmigen.cli import main
-from nmigen.lib.fifo import FIFOInterface
-
-# translated from https://github.com/freechipsproject/chisel3/blob/a4a29e29c3f1eed18f851dcf10bdc845571dfcb6/src/main/scala/chisel3/util/Decoupled.scala#L185 # noqa
-
-
-class Queue(FIFOInterface, Elaboratable):
- def __init__(self, width, depth, fwft=True, pipe=False):
- """ Queue (FIFO) with pipe mode and first-write fall-through capability
-
- * :width: width of Queue data in/out
- * :depth: queue depth. NOTE: may be set to 0 (this is ok)
- * :fwft : first-write, fall-through mode (Chisel Queue "flow" mode)
- * :pipe : pipe mode. NOTE: this mode can cause unanticipated
- problems. when read is enabled, so is writeable.
- therefore if read is enabled, the data ABSOLUTELY MUST
- be read.
-
- fwft mode = True basically means that the data may be transferred
- combinatorially from input to output.
-
- Attributes:
- * level: available free space (number of unread entries)
-
- din = enq_data, writable = enq_ready, we = enq_valid
- dout = deq_data, re = deq_ready, readable = deq_valid
- """
- FIFOInterface.__init__(self, width, depth, fwft)
- self.pipe = pipe
- self.depth = depth
- self.level = Signal(bits_for(depth))
-
- def elaborate(self, platform):
- m = Module()
-
- # set up an SRAM. XXX bug in Memory: cannot create SRAM of depth 1
- ram = Memory(self.width, self.depth if self.depth > 1 else 2)
- m.submodules.ram_read = ram_read = ram.read_port(synchronous=False)
- m.submodules.ram_write = ram_write = ram.write_port()
-
- # convenience names
- p_ready_o = self.writable
- p_valid_i = self.we
- enq_data = self.din
-
- n_valid_o = self.readable
- n_ready_i = self.re
- deq_data = self.dout
-
- # intermediaries
- ptr_width = bits_for(self.depth - 1) if self.depth > 1 else 0
- enq_ptr = Signal(ptr_width) # cyclic pointer to "insert" point (wrport)
- deq_ptr = Signal(ptr_width) # cyclic pointer to "remove" point (rdport)
- maybe_full = Signal() # not reset_less (set by sync)
-
- # temporaries
- do_enq = Signal(reset_less=True)
- do_deq = Signal(reset_less=True)
- ptr_diff = Signal(ptr_width)
- ptr_match = Signal(reset_less=True)
- empty = Signal(reset_less=True)
- full = Signal(reset_less=True)
- enq_max = Signal(reset_less=True)
- deq_max = Signal(reset_less=True)
-
- m.d.comb += [ptr_match.eq(enq_ptr == deq_ptr), # read-ptr = write-ptr
- ptr_diff.eq(enq_ptr - deq_ptr),
- enq_max.eq(enq_ptr == self.depth - 1),
- deq_max.eq(deq_ptr == self.depth - 1),
- empty.eq(ptr_match & ~maybe_full),
- full.eq(ptr_match & maybe_full),
- do_enq.eq(p_ready_o & p_valid_i), # write conditions ok
- do_deq.eq(n_ready_i & n_valid_o), # read conditions ok
-
- # set readable and writable (NOTE: see pipe mode below)
- n_valid_o.eq(~empty), # cannot read if empty!
- p_ready_o.eq(~full), # cannot write if full!
-
- # set up memory and connect to input and output
- ram_write.addr.eq(enq_ptr),
- ram_write.data.eq(enq_data),
- ram_write.en.eq(do_enq),
- ram_read.addr.eq(deq_ptr),
- deq_data.eq(ram_read.data) # NOTE: overridden in fwft mode
- ]
-
- # under write conditions, SRAM write-pointer moves on next clock
- with m.If(do_enq):
- m.d.sync += enq_ptr.eq(Mux(enq_max, 0, enq_ptr+1))
-
- # under read conditions, SRAM read-pointer moves on next clock
- with m.If(do_deq):
- m.d.sync += deq_ptr.eq(Mux(deq_max, 0, deq_ptr+1))
-
- # if read-but-not-write or write-but-not-read, maybe_full set
- with m.If(do_enq != do_deq):
- m.d.sync += maybe_full.eq(do_enq)
-
- # first-word fall-through: same as "flow" parameter in Chisel3 Queue
- # basically instead of relying on the Memory characteristics (which
- # in FPGAs do not have write-through), then when the queue is empty
- # take the output directly from the input, i.e. *bypass* the SRAM.
- # this done combinatorially to give the exact same characteristics
- # as Memory "write-through"... without relying on a changing API
- if self.fwft:
- with m.If(p_valid_i):
- m.d.comb += n_valid_o.eq(1)
- with m.If(empty):
- m.d.comb += deq_data.eq(enq_data)
- m.d.comb += do_deq.eq(0)
- with m.If(n_ready_i):
- m.d.comb += do_enq.eq(0)
-
- # pipe mode: if next stage says it's ready (readable), we
- # *must* declare the input ready (writeable).
- if self.pipe:
- with m.If(n_ready_i):
- m.d.comb += p_ready_o.eq(1)
-
- # set the count (available free space), optimise on power-of-two
- if self.depth == 1 << ptr_width: # is depth a power of 2
- m.d.comb += self.level.eq(
- Mux(maybe_full & ptr_match, self.depth, 0) | ptr_diff)
- else:
- m.d.comb += self.level.eq(Mux(ptr_match,
- Mux(maybe_full, self.depth, 0),
- Mux(deq_ptr > enq_ptr,
- self.depth + ptr_diff,
- ptr_diff)))
-
- return m
-
-
-if __name__ == "__main__":
- reg_stage = Queue(1, 1, pipe=True)
- break_ready_chain_stage = Queue(1, 1, pipe=True, fwft=True)
- m = Module()
- ports = []
-
- def queue_ports(queue, name_prefix):
- retval = []
- for name in ["level",
- "dout",
- "readable",
- "writable"]:
- port = getattr(queue, name)
- signal = Signal(port.shape(), name=name_prefix+name)
- m.d.comb += signal.eq(port)
- retval.append(signal)
- for name in ["re",
- "din",
- "we"]:
- port = getattr(queue, name)
- signal = Signal(port.shape(), name=name_prefix+name)
- m.d.comb += port.eq(signal)
- retval.append(signal)
- return retval
-
- m.submodules.reg_stage = reg_stage
- ports += queue_ports(reg_stage, "reg_stage_")
- m.submodules.break_ready_chain_stage = break_ready_chain_stage
- ports += queue_ports(break_ready_chain_stage, "break_ready_chain_stage_")
- main(m, ports=ports)
+++ /dev/null
-from nmigen import Module, Signal, Mux, Const, Elaboratable
-from nmigen.hdl.rec import Record, Layout, DIR_NONE
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen.compat.fhdl.bitcontainer import value_bits_sign
-from singlepipe import cat, RecordObject
-
-
-class RecordTest:
-
- def __init__(self):
- self.r1 = RecordObject()
- self.r1.sig1 = Signal(16)
- self.r1.r2 = RecordObject()
- self.r1.r2.sig2 = Signal(16)
- self.r1.r3 = RecordObject()
- self.r1.r3.sig3 = Signal(16)
- self.sig123 = Signal(48)
-
- def elaborate(self, platform):
- m = Module()
-
- sig1 = Signal(16)
- m.d.comb += sig1.eq(self.r1.sig1)
- sig2 = Signal(16)
- m.d.comb += sig2.eq(self.r1.r2.sig2)
-
- print (self.r1.fields)
- print (self.r1.shape())
- print ("width", len(self.r1))
- m.d.comb += self.sig123.eq(cat(self.r1))
-
- return m
-
-
-def testbench(dut):
- yield dut.r1.sig1.eq(5)
- yield dut.r1.r2.sig2.eq(10)
- yield dut.r1.r3.sig3.eq(1)
-
- sig1 = yield dut.r1.sig1
- assert sig1 == 5
- sig2 = yield dut.r1.r2.sig2
- assert sig2 == 10
-
- yield
-
- sig123 = yield dut.sig123
- print ("sig123", hex(sig123))
- assert sig123 == 0x1000a0005
-
-
-
-class RecordTest2(Elaboratable):
-
- def __init__(self):
- self.r1 = RecordObject()
- self.r1.sig1 = Signal(16)
- self.r1.r2 = RecordObject()
- self.r1.r2.sig2 = Signal(16)
- self.r1.r3 = RecordObject()
- self.r1.r3.sig3 = Signal(16)
- self.sig123 = Signal(48)
-
- def elaborate(self, platform):
- m = Module()
-
- m.d.comb += cat(self.r1).eq(self.sig123)
-
- return m
-
-
-def testbench2(dut):
-
- sig123 = yield dut.sig123.eq(0x1000a0005)
-
- yield
-
- sig1 = yield dut.r1.sig1
- assert sig1 == 5
- sig2 = yield dut.r1.r2.sig2
- assert sig2 == 10
- sig3 = yield dut.r1.r3.sig3
- assert sig3 == 1
-
-
-
-######################################################################
-# Unit Tests
-######################################################################
-
-if __name__ == '__main__':
- print ("test 1")
- dut = RecordTest()
- run_simulation(dut, testbench(dut), vcd_name="test_record1.vcd")
- vl = rtlil.convert(dut, ports=[dut.sig123, dut.r1.sig1, dut.r1.r2.sig2])
- with open("test_record1.il", "w") as f:
- f.write(vl)
-
- print ("test 2")
- dut = RecordTest2()
- run_simulation(dut, testbench2(dut), vcd_name="test_record2.vcd")
- vl = rtlil.convert(dut, ports=[dut.sig123, dut.r1.sig1, dut.r1.r2.sig2])
- with open("test_record2.il", "w") as f:
- f.write(vl)
-
+++ /dev/null
-from nmigen import Signal, Cat, Const, Mux, Module
-
-from nmigen.cli import main, verilog
-
-from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
-from fpbase import MultiShiftRMerge
-
-class ReservationStationRow:
-
- def __init__(self, width, id_wid):
- """ Reservation Station row
-
- * width: bit-width of IEEE754. supported: 16, 32, 64
- * id_wid: an identifier to be passed through to the FunctionUnit
- """
- self.width = width
-
- self.in_a = Signal(width)
- self.in_b = Signal(width)
- self.id_wid = id_wid
- self.out_z = Signal(width)
-
- def elaborate(self, platform=None):
- """ creates the HDL code-fragment for ReservationStationRow
- """
- m = Module()
-
- return m
-
-
-if __name__ == "__main__":
- rs = ReservationStationRow(width=32, id_wid=Const(1,4))
- main(alu, ports=[rs.in_a, rs.in_b, rs.out_z]
-
- # works... but don't use, just do "python fname.py convert -t v"
- #print (verilog.convert(alu, ports=[
- # ports=alu.in_a.ports() + \
- # alu.in_b.ports() + \
- # alu.out_z.ports())
+++ /dev/null
-""" Pipeline API. For multi-input and multi-output variants, see multipipe.
-
- Associated development bugs:
- * http://bugs.libre-riscv.org/show_bug.cgi?id=64
- * http://bugs.libre-riscv.org/show_bug.cgi?id=57
-
- Important: see Stage API (stageapi.py) in combination with below
-
- RecordBasedStage:
- ----------------
-
- A convenience class that takes an input shape, output shape, a
- "processing" function and an optional "setup" function. Honestly
- though, there's not much more effort to just... create a class
- that returns a couple of Records (see ExampleAddRecordStage in
- examples).
-
- PassThroughStage:
- ----------------
-
- A convenience class that takes a single function as a parameter,
- that is chain-called to create the exact same input and output spec.
- It has a process() function that simply returns its input.
-
- Instances of this class are completely redundant if handed to
- StageChain, however when passed to UnbufferedPipeline they
- can be used to introduce a single clock delay.
-
- ControlBase:
- -----------
-
- The base class for pipelines. Contains previous and next ready/valid/data.
- Also has an extremely useful "connect" function that can be used to
- connect a chain of pipelines and present the exact same prev/next
- ready/valid/data API.
-
- Note: pipelines basically do not become pipelines as such until
- handed to a derivative of ControlBase. ControlBase itself is *not*
- strictly considered a pipeline class. Wishbone and AXI4 (master or
- slave) could be derived from ControlBase, for example.
- UnbufferedPipeline:
- ------------------
-
- A simple stalling clock-synchronised pipeline that has no buffering
- (unlike BufferedHandshake). Data flows on *every* clock cycle when
- the conditions are right (this is nominally when the input is valid
- and the output is ready).
-
- A stall anywhere along the line will result in a stall back-propagating
- down the entire chain. The BufferedHandshake by contrast will buffer
- incoming data, allowing previous stages one clock cycle's grace before
- also having to stall.
-
- An advantage of the UnbufferedPipeline over the Buffered one is
- that the amount of logic needed (number of gates) is greatly
- reduced (no second set of buffers basically)
-
- The disadvantage of the UnbufferedPipeline is that the valid/ready
- logic, if chained together, is *combinatorial*, resulting in
- progressively larger gate delay.
-
- PassThroughHandshake:
- ------------------
-
- A Control class that introduces a single clock delay, passing its
- data through unaltered. Unlike RegisterPipeline (which relies
- on UnbufferedPipeline and PassThroughStage) it handles ready/valid
- itself.
-
- RegisterPipeline:
- ----------------
-
- A convenience class that, because UnbufferedPipeline introduces a single
- clock delay, when its stage is a PassThroughStage, it results in a Pipeline
- stage that, duh, delays its (unmodified) input by one clock cycle.
-
- BufferedHandshake:
- ----------------
-
- nmigen implementation of buffered pipeline stage, based on zipcpu:
- https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html
-
- this module requires quite a bit of thought to understand how it works
- (and why it is needed in the first place). reading the above is
- *strongly* recommended.
-
- unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires
- the STB / ACK signals to raise and lower (on separate clocks) before
- data may proceeed (thus only allowing one piece of data to proceed
- on *ALTERNATE* cycles), the signalling here is a true pipeline
- where data will flow on *every* clock when the conditions are right.
-
- input acceptance conditions are when:
- * incoming previous-stage strobe (p.valid_i) is HIGH
- * outgoing previous-stage ready (p.ready_o) is LOW
-
- output transmission conditions are when:
- * outgoing next-stage strobe (n.valid_o) is HIGH
- * outgoing next-stage ready (n.ready_i) is LOW
-
- the tricky bit is when the input has valid data and the output is not
- ready to accept it. if it wasn't for the clock synchronisation, it
- would be possible to tell the input "hey don't send that data, we're
- not ready". unfortunately, it's not possible to "change the past":
- the previous stage *has no choice* but to pass on its data.
-
- therefore, the incoming data *must* be accepted - and stored: that
- is the responsibility / contract that this stage *must* accept.
- on the same clock, it's possible to tell the input that it must
- not send any more data. this is the "stall" condition.
-
- we now effectively have *two* possible pieces of data to "choose" from:
- the buffered data, and the incoming data. the decision as to which
- to process and output is based on whether we are in "stall" or not.
- i.e. when the next stage is no longer ready, the output comes from
- the buffer if a stall had previously occurred, otherwise it comes
- direct from processing the input.
-
- this allows us to respect a synchronous "travelling STB" with what
- dan calls a "buffered handshake".
-
- it's quite a complex state machine!
-
- SimpleHandshake
- ---------------
-
- Synchronised pipeline, Based on:
- https://github.com/ZipCPU/dbgbus/blob/master/hexbus/rtl/hbdeword.v
-"""
-
-from nmigen import Signal, Mux, Module, Elaboratable
-from nmigen.cli import verilog, rtlil
-from nmigen.hdl.rec import Record
-
-from queue import Queue
-import inspect
-
-from iocontrol import (PrevControl, NextControl, Object, RecordObject)
-from stageapi import (_spec, StageCls, Stage, StageChain, StageHelper)
-import nmoperator
-
-
-class RecordBasedStage(Stage):
- """ convenience class which provides a Records-based layout.
- honestly it's a lot easier just to create a direct Records-based
- class (see ExampleAddRecordStage)
- """
- def __init__(self, in_shape, out_shape, processfn, setupfn=None):
- self.in_shape = in_shape
- self.out_shape = out_shape
- self.__process = processfn
- self.__setup = setupfn
- def ispec(self): return Record(self.in_shape)
- def ospec(self): return Record(self.out_shape)
- def process(seif, i): return self.__process(i)
- def setup(seif, m, i): return self.__setup(m, i)
-
-
-class PassThroughStage(StageCls):
- """ a pass-through stage with its input data spec identical to its output,
- and "passes through" its data from input to output (does nothing).
-
- use this basically to explicitly make any data spec Stage-compliant.
- (many APIs would potentially use a static "wrap" method in e.g.
- StageCls to achieve a similar effect)
- """
- def __init__(self, iospecfn): self.iospecfn = iospecfn
- def ispec(self): return self.iospecfn()
- def ospec(self): return self.iospecfn()
-
-
-class ControlBase(StageHelper, Elaboratable):
- """ Common functions for Pipeline API. Note: a "pipeline stage" only
- exists (conceptually) when a ControlBase derivative is handed
- a Stage (combinatorial block)
-
- NOTE: ControlBase derives from StageHelper, making it accidentally
- compliant with the Stage API. Using those functions directly
- *BYPASSES* a ControlBase instance ready/valid signalling, which
- clearly should not be done without a really, really good reason.
- """
- def __init__(self, stage=None, in_multi=None, stage_ctl=False):
- """ Base class containing ready/valid/data to previous and next stages
-
- * p: contains ready/valid to the previous stage
- * n: contains ready/valid to the next stage
-
- Except when calling Controlbase.connect(), user must also:
- * add data_i member to PrevControl (p) and
- * add data_o member to NextControl (n)
- Calling ControlBase._new_data is a good way to do that.
- """
- StageHelper.__init__(self, stage)
-
- # set up input and output IO ACK (prev/next ready/valid)
- self.p = PrevControl(in_multi, stage_ctl)
- self.n = NextControl(stage_ctl)
-
- # set up the input and output data
- if stage is not None:
- self._new_data("data")
-
- def _new_data(self, name):
- """ allocates new data_i and data_o
- """
- self.p.data_i, self.n.data_o = self.new_specs(name)
-
- @property
- def data_r(self):
- return self.process(self.p.data_i)
-
- def connect_to_next(self, nxt):
- """ helper function to connect to the next stage data/valid/ready.
- """
- return self.n.connect_to_next(nxt.p)
-
- def _connect_in(self, prev):
- """ internal helper function to connect stage to an input source.
- do not use to connect stage-to-stage!
- """
- return self.p._connect_in(prev.p)
-
- def _connect_out(self, nxt):
- """ internal helper function to connect stage to an output source.
- do not use to connect stage-to-stage!
- """
- return self.n._connect_out(nxt.n)
-
- def connect(self, pipechain):
- """ connects a chain (list) of Pipeline instances together and
- links them to this ControlBase instance:
-
- in <----> self <---> out
- | ^
- v |
- [pipe1, pipe2, pipe3, pipe4]
- | ^ | ^ | ^
- v | v | v |
- out---in out--in out---in
-
- Also takes care of allocating data_i/data_o, by looking up
- the data spec for each end of the pipechain. i.e It is NOT
- necessary to allocate self.p.data_i or self.n.data_o manually:
- this is handled AUTOMATICALLY, here.
-
- Basically this function is the direct equivalent of StageChain,
- except that unlike StageChain, the Pipeline logic is followed.
-
- Just as StageChain presents an object that conforms to the
- Stage API from a list of objects that also conform to the
- Stage API, an object that calls this Pipeline connect function
- has the exact same pipeline API as the list of pipline objects
- it is called with.
-
- Thus it becomes possible to build up larger chains recursively.
- More complex chains (multi-input, multi-output) will have to be
- done manually.
-
- Argument:
-
- * :pipechain: - a sequence of ControlBase-derived classes
- (must be one or more in length)
-
- Returns:
-
- * a list of eq assignments that will need to be added in
- an elaborate() to m.d.comb
- """
- assert len(pipechain) > 0, "pipechain must be non-zero length"
- assert self.stage is None, "do not use connect with a stage"
- eqs = [] # collated list of assignment statements
-
- # connect inter-chain
- for i in range(len(pipechain)-1):
- pipe1 = pipechain[i] # earlier
- pipe2 = pipechain[i+1] # later (by 1)
- eqs += pipe1.connect_to_next(pipe2) # earlier n to later p
-
- # connect front and back of chain to ourselves
- front = pipechain[0] # first in chain
- end = pipechain[-1] # last in chain
- self.set_specs(front, end) # sets up ispec/ospec functions
- self._new_data("chain") # NOTE: REPLACES existing data
- eqs += front._connect_in(self) # front p to our p
- eqs += end._connect_out(self) # end n to our n
-
- return eqs
-
- def set_input(self, i):
- """ helper function to set the input data (used in unit tests)
- """
- return nmoperator.eq(self.p.data_i, i)
-
- def __iter__(self):
- yield from self.p # yields ready/valid/data (data also gets yielded)
- yield from self.n # ditto
-
- def ports(self):
- return list(self)
-
- def elaborate(self, platform):
- """ handles case where stage has dynamic ready/valid functions
- """
- m = Module()
- m.submodules.p = self.p
- m.submodules.n = self.n
-
- self.setup(m, self.p.data_i)
-
- if not self.p.stage_ctl:
- return m
-
- # intercept the previous (outgoing) "ready", combine with stage ready
- m.d.comb += self.p.s_ready_o.eq(self.p._ready_o & self.stage.d_ready)
-
- # intercept the next (incoming) "ready" and combine it with data valid
- sdv = self.stage.d_valid(self.n.ready_i)
- m.d.comb += self.n.d_valid.eq(self.n.ready_i & sdv)
-
- return m
-
-
-class BufferedHandshake(ControlBase):
- """ buffered pipeline stage. data and strobe signals travel in sync.
- if ever the input is ready and the output is not, processed data
- is shunted in a temporary register.
-
- Argument: stage. see Stage API above
-
- stage-1 p.valid_i >>in stage n.valid_o out>> stage+1
- stage-1 p.ready_o <<out stage n.ready_i <<in stage+1
- stage-1 p.data_i >>in stage n.data_o out>> stage+1
- | |
- process --->----^
- | |
- +-- r_data ->-+
-
- input data p.data_i is read (only), is processed and goes into an
- intermediate result store [process()]. this is updated combinatorially.
-
- in a non-stall condition, the intermediate result will go into the
- output (update_output). however if ever there is a stall, it goes
- into r_data instead [update_buffer()].
-
- when the non-stall condition is released, r_data is the first
- to be transferred to the output [flush_buffer()], and the stall
- condition cleared.
-
- on the next cycle (as long as stall is not raised again) the
- input may begin to be processed and transferred directly to output.
- """
-
- def elaborate(self, platform):
- self.m = ControlBase.elaborate(self, platform)
-
- result = _spec(self.stage.ospec, "r_tmp")
- r_data = _spec(self.stage.ospec, "r_data")
-
- # establish some combinatorial temporaries
- o_n_validn = Signal(reset_less=True)
- n_ready_i = Signal(reset_less=True, name="n_i_rdy_data")
- nir_por = Signal(reset_less=True)
- nir_por_n = Signal(reset_less=True)
- p_valid_i = Signal(reset_less=True)
- nir_novn = Signal(reset_less=True)
- nirn_novn = Signal(reset_less=True)
- por_pivn = Signal(reset_less=True)
- npnn = Signal(reset_less=True)
- self.m.d.comb += [p_valid_i.eq(self.p.valid_i_test),
- o_n_validn.eq(~self.n.valid_o),
- n_ready_i.eq(self.n.ready_i_test),
- nir_por.eq(n_ready_i & self.p._ready_o),
- nir_por_n.eq(n_ready_i & ~self.p._ready_o),
- nir_novn.eq(n_ready_i | o_n_validn),
- nirn_novn.eq(~n_ready_i & o_n_validn),
- npnn.eq(nir_por | nirn_novn),
- por_pivn.eq(self.p._ready_o & ~p_valid_i)
- ]
-
- # store result of processing in combinatorial temporary
- self.m.d.comb += nmoperator.eq(result, self.data_r)
-
- # if not in stall condition, update the temporary register
- with self.m.If(self.p.ready_o): # not stalled
- self.m.d.sync += nmoperator.eq(r_data, result) # update buffer
-
- # data pass-through conditions
- with self.m.If(npnn):
- data_o = self._postprocess(result) # XXX TBD, does nothing right now
- self.m.d.sync += [self.n.valid_o.eq(p_valid_i), # valid if p_valid
- nmoperator.eq(self.n.data_o, data_o), # update out
- ]
- # buffer flush conditions (NOTE: can override data passthru conditions)
- with self.m.If(nir_por_n): # not stalled
- # Flush the [already processed] buffer to the output port.
- data_o = self._postprocess(r_data) # XXX TBD, does nothing right now
- self.m.d.sync += [self.n.valid_o.eq(1), # reg empty
- nmoperator.eq(self.n.data_o, data_o), # flush
- ]
- # output ready conditions
- self.m.d.sync += self.p._ready_o.eq(nir_novn | por_pivn)
-
- return self.m
-
-
-class SimpleHandshake(ControlBase):
- """ simple handshake control. data and strobe signals travel in sync.
- implements the protocol used by Wishbone and AXI4.
-
- Argument: stage. see Stage API above
-
- stage-1 p.valid_i >>in stage n.valid_o out>> stage+1
- stage-1 p.ready_o <<out stage n.ready_i <<in stage+1
- stage-1 p.data_i >>in stage n.data_o out>> stage+1
- | |
- +--process->--^
- Truth Table
-
- Inputs Temporary Output Data
- ------- ---------- ----- ----
- P P N N PiV& ~NiR& N P
- i o i o PoR NoV o o
- V R R V V R
-
- ------- - - - -
- 0 0 0 0 0 0 >0 0 reg
- 0 0 0 1 0 1 >1 0 reg
- 0 0 1 0 0 0 0 1 process(data_i)
- 0 0 1 1 0 0 0 1 process(data_i)
- ------- - - - -
- 0 1 0 0 0 0 >0 0 reg
- 0 1 0 1 0 1 >1 0 reg
- 0 1 1 0 0 0 0 1 process(data_i)
- 0 1 1 1 0 0 0 1 process(data_i)
- ------- - - - -
- 1 0 0 0 0 0 >0 0 reg
- 1 0 0 1 0 1 >1 0 reg
- 1 0 1 0 0 0 0 1 process(data_i)
- 1 0 1 1 0 0 0 1 process(data_i)
- ------- - - - -
- 1 1 0 0 1 0 1 0 process(data_i)
- 1 1 0 1 1 1 1 0 process(data_i)
- 1 1 1 0 1 0 1 1 process(data_i)
- 1 1 1 1 1 0 1 1 process(data_i)
- ------- - - - -
- """
-
- def elaborate(self, platform):
- self.m = m = ControlBase.elaborate(self, platform)
-
- r_busy = Signal()
- result = _spec(self.stage.ospec, "r_tmp")
-
- # establish some combinatorial temporaries
- n_ready_i = Signal(reset_less=True, name="n_i_rdy_data")
- p_valid_i_p_ready_o = Signal(reset_less=True)
- p_valid_i = Signal(reset_less=True)
- m.d.comb += [p_valid_i.eq(self.p.valid_i_test),
- n_ready_i.eq(self.n.ready_i_test),
- p_valid_i_p_ready_o.eq(p_valid_i & self.p.ready_o),
- ]
-
- # store result of processing in combinatorial temporary
- m.d.comb += nmoperator.eq(result, self.data_r)
-
- # previous valid and ready
- with m.If(p_valid_i_p_ready_o):
- data_o = self._postprocess(result) # XXX TBD, does nothing right now
- m.d.sync += [r_busy.eq(1), # output valid
- nmoperator.eq(self.n.data_o, data_o), # update output
- ]
- # previous invalid or not ready, however next is accepting
- with m.Elif(n_ready_i):
- data_o = self._postprocess(result) # XXX TBD, does nothing right now
- m.d.sync += [nmoperator.eq(self.n.data_o, data_o)]
- # TODO: could still send data here (if there was any)
- #m.d.sync += self.n.valid_o.eq(0) # ...so set output invalid
- m.d.sync += r_busy.eq(0) # ...so set output invalid
-
- m.d.comb += self.n.valid_o.eq(r_busy)
- # if next is ready, so is previous
- m.d.comb += self.p._ready_o.eq(n_ready_i)
-
- return self.m
-
-
-class UnbufferedPipeline(ControlBase):
- """ A simple pipeline stage with single-clock synchronisation
- and two-way valid/ready synchronised signalling.
-
- Note that a stall in one stage will result in the entire pipeline
- chain stalling.
-
- Also that unlike BufferedHandshake, the valid/ready signalling does NOT
- travel synchronously with the data: the valid/ready signalling
- combines in a *combinatorial* fashion. Therefore, a long pipeline
- chain will lengthen propagation delays.
-
- Argument: stage. see Stage API, above
-
- stage-1 p.valid_i >>in stage n.valid_o out>> stage+1
- stage-1 p.ready_o <<out stage n.ready_i <<in stage+1
- stage-1 p.data_i >>in stage n.data_o out>> stage+1
- | |
- r_data result
- | |
- +--process ->-+
-
- Attributes:
- -----------
- p.data_i : StageInput, shaped according to ispec
- The pipeline input
- p.data_o : StageOutput, shaped according to ospec
- The pipeline output
- r_data : input_shape according to ispec
- A temporary (buffered) copy of a prior (valid) input.
- This is HELD if the output is not ready. It is updated
- SYNCHRONOUSLY.
- result: output_shape according to ospec
- The output of the combinatorial logic. it is updated
- COMBINATORIALLY (no clock dependence).
-
- Truth Table
-
- Inputs Temp Output Data
- ------- - ----- ----
- P P N N ~NiR& N P
- i o i o NoV o o
- V R R V V R
-
- ------- - - -
- 0 0 0 0 0 0 1 reg
- 0 0 0 1 1 1 0 reg
- 0 0 1 0 0 0 1 reg
- 0 0 1 1 0 0 1 reg
- ------- - - -
- 0 1 0 0 0 0 1 reg
- 0 1 0 1 1 1 0 reg
- 0 1 1 0 0 0 1 reg
- 0 1 1 1 0 0 1 reg
- ------- - - -
- 1 0 0 0 0 1 1 reg
- 1 0 0 1 1 1 0 reg
- 1 0 1 0 0 1 1 reg
- 1 0 1 1 0 1 1 reg
- ------- - - -
- 1 1 0 0 0 1 1 process(data_i)
- 1 1 0 1 1 1 0 process(data_i)
- 1 1 1 0 0 1 1 process(data_i)
- 1 1 1 1 0 1 1 process(data_i)
- ------- - - -
-
- Note: PoR is *NOT* involved in the above decision-making.
- """
-
- def elaborate(self, platform):
- self.m = m = ControlBase.elaborate(self, platform)
-
- data_valid = Signal() # is data valid or not
- r_data = _spec(self.stage.ospec, "r_tmp") # output type
-
- # some temporaries
- p_valid_i = Signal(reset_less=True)
- pv = Signal(reset_less=True)
- buf_full = Signal(reset_less=True)
- m.d.comb += p_valid_i.eq(self.p.valid_i_test)
- m.d.comb += pv.eq(self.p.valid_i & self.p.ready_o)
- m.d.comb += buf_full.eq(~self.n.ready_i_test & data_valid)
-
- m.d.comb += self.n.valid_o.eq(data_valid)
- m.d.comb += self.p._ready_o.eq(~data_valid | self.n.ready_i_test)
- m.d.sync += data_valid.eq(p_valid_i | buf_full)
-
- with m.If(pv):
- m.d.sync += nmoperator.eq(r_data, self.data_r)
- data_o = self._postprocess(r_data) # XXX TBD, does nothing right now
- m.d.comb += nmoperator.eq(self.n.data_o, data_o)
-
- return self.m
-
-class UnbufferedPipeline2(ControlBase):
- """ A simple pipeline stage with single-clock synchronisation
- and two-way valid/ready synchronised signalling.
-
- Note that a stall in one stage will result in the entire pipeline
- chain stalling.
-
- Also that unlike BufferedHandshake, the valid/ready signalling does NOT
- travel synchronously with the data: the valid/ready signalling
- combines in a *combinatorial* fashion. Therefore, a long pipeline
- chain will lengthen propagation delays.
-
- Argument: stage. see Stage API, above
-
- stage-1 p.valid_i >>in stage n.valid_o out>> stage+1
- stage-1 p.ready_o <<out stage n.ready_i <<in stage+1
- stage-1 p.data_i >>in stage n.data_o out>> stage+1
- | | |
- +- process-> buf <-+
- Attributes:
- -----------
- p.data_i : StageInput, shaped according to ispec
- The pipeline input
- p.data_o : StageOutput, shaped according to ospec
- The pipeline output
- buf : output_shape according to ospec
- A temporary (buffered) copy of a valid output
- This is HELD if the output is not ready. It is updated
- SYNCHRONOUSLY.
-
- Inputs Temp Output Data
- ------- - -----
- P P N N ~NiR& N P (buf_full)
- i o i o NoV o o
- V R R V V R
-
- ------- - - -
- 0 0 0 0 0 0 1 process(data_i)
- 0 0 0 1 1 1 0 reg (odata, unchanged)
- 0 0 1 0 0 0 1 process(data_i)
- 0 0 1 1 0 0 1 process(data_i)
- ------- - - -
- 0 1 0 0 0 0 1 process(data_i)
- 0 1 0 1 1 1 0 reg (odata, unchanged)
- 0 1 1 0 0 0 1 process(data_i)
- 0 1 1 1 0 0 1 process(data_i)
- ------- - - -
- 1 0 0 0 0 1 1 process(data_i)
- 1 0 0 1 1 1 0 reg (odata, unchanged)
- 1 0 1 0 0 1 1 process(data_i)
- 1 0 1 1 0 1 1 process(data_i)
- ------- - - -
- 1 1 0 0 0 1 1 process(data_i)
- 1 1 0 1 1 1 0 reg (odata, unchanged)
- 1 1 1 0 0 1 1 process(data_i)
- 1 1 1 1 0 1 1 process(data_i)
- ------- - - -
-
- Note: PoR is *NOT* involved in the above decision-making.
- """
-
- def elaborate(self, platform):
- self.m = m = ControlBase.elaborate(self, platform)
-
- buf_full = Signal() # is data valid or not
- buf = _spec(self.stage.ospec, "r_tmp") # output type
-
- # some temporaries
- p_valid_i = Signal(reset_less=True)
- m.d.comb += p_valid_i.eq(self.p.valid_i_test)
-
- m.d.comb += self.n.valid_o.eq(buf_full | p_valid_i)
- m.d.comb += self.p._ready_o.eq(~buf_full)
- m.d.sync += buf_full.eq(~self.n.ready_i_test & self.n.valid_o)
-
- data_o = Mux(buf_full, buf, self.data_r)
- data_o = self._postprocess(data_o) # XXX TBD, does nothing right now
- m.d.comb += nmoperator.eq(self.n.data_o, data_o)
- m.d.sync += nmoperator.eq(buf, self.n.data_o)
-
- return self.m
-
-
-class PassThroughHandshake(ControlBase):
- """ A control block that delays by one clock cycle.
-
- Inputs Temporary Output Data
- ------- ------------------ ----- ----
- P P N N PiV& PiV| NiR| pvr N P (pvr)
- i o i o PoR ~PoR ~NoV o o
- V R R V V R
-
- ------- - - - - - -
- 0 0 0 0 0 1 1 0 1 1 odata (unchanged)
- 0 0 0 1 0 1 0 0 1 0 odata (unchanged)
- 0 0 1 0 0 1 1 0 1 1 odata (unchanged)
- 0 0 1 1 0 1 1 0 1 1 odata (unchanged)
- ------- - - - - - -
- 0 1 0 0 0 0 1 0 0 1 odata (unchanged)
- 0 1 0 1 0 0 0 0 0 0 odata (unchanged)
- 0 1 1 0 0 0 1 0 0 1 odata (unchanged)
- 0 1 1 1 0 0 1 0 0 1 odata (unchanged)
- ------- - - - - - -
- 1 0 0 0 0 1 1 1 1 1 process(in)
- 1 0 0 1 0 1 0 0 1 0 odata (unchanged)
- 1 0 1 0 0 1 1 1 1 1 process(in)
- 1 0 1 1 0 1 1 1 1 1 process(in)
- ------- - - - - - -
- 1 1 0 0 1 1 1 1 1 1 process(in)
- 1 1 0 1 1 1 0 0 1 0 odata (unchanged)
- 1 1 1 0 1 1 1 1 1 1 process(in)
- 1 1 1 1 1 1 1 1 1 1 process(in)
- ------- - - - - - -
-
- """
-
- def elaborate(self, platform):
- self.m = m = ControlBase.elaborate(self, platform)
-
- r_data = _spec(self.stage.ospec, "r_tmp") # output type
-
- # temporaries
- p_valid_i = Signal(reset_less=True)
- pvr = Signal(reset_less=True)
- m.d.comb += p_valid_i.eq(self.p.valid_i_test)
- m.d.comb += pvr.eq(p_valid_i & self.p.ready_o)
-
- m.d.comb += self.p.ready_o.eq(~self.n.valid_o | self.n.ready_i_test)
- m.d.sync += self.n.valid_o.eq(p_valid_i | ~self.p.ready_o)
-
- odata = Mux(pvr, self.data_r, r_data)
- m.d.sync += nmoperator.eq(r_data, odata)
- r_data = self._postprocess(r_data) # XXX TBD, does nothing right now
- m.d.comb += nmoperator.eq(self.n.data_o, r_data)
-
- return m
-
-
-class RegisterPipeline(UnbufferedPipeline):
- """ A pipeline stage that delays by one clock cycle, creating a
- sync'd latch out of data_o and valid_o as an indirect byproduct
- of using PassThroughStage
- """
- def __init__(self, iospecfn):
- UnbufferedPipeline.__init__(self, PassThroughStage(iospecfn))
-
-
-class FIFOControl(ControlBase):
- """ FIFO Control. Uses Queue to store data, coincidentally
- happens to have same valid/ready signalling as Stage API.
-
- data_i -> fifo.din -> FIFO -> fifo.dout -> data_o
- """
- def __init__(self, depth, stage, in_multi=None, stage_ctl=False,
- fwft=True, pipe=False):
- """ FIFO Control
-
- * :depth: number of entries in the FIFO
- * :stage: data processing block
- * :fwft: first word fall-thru mode (non-fwft introduces delay)
- * :pipe: specifies pipe mode.
-
- when fwft = True it indicates that transfers may occur
- combinatorially through stage processing in the same clock cycle.
- This requires that the Stage be a Moore FSM:
- https://en.wikipedia.org/wiki/Moore_machine
-
- when fwft = False it indicates that all output signals are
- produced only from internal registers or memory, i.e. that the
- Stage is a Mealy FSM:
- https://en.wikipedia.org/wiki/Mealy_machine
-
- data is processed (and located) as follows:
-
- self.p self.stage temp fn temp fn temp fp self.n
- data_i->process()->result->cat->din.FIFO.dout->cat(data_o)
-
- yes, really: cat produces a Cat() which can be assigned to.
- this is how the FIFO gets de-catted without needing a de-cat
- function
- """
- self.fwft = fwft
- self.pipe = pipe
- self.fdepth = depth
- ControlBase.__init__(self, stage, in_multi, stage_ctl)
-
- def elaborate(self, platform):
- self.m = m = ControlBase.elaborate(self, platform)
-
- # make a FIFO with a signal of equal width to the data_o.
- (fwidth, _) = nmoperator.shape(self.n.data_o)
- fifo = Queue(fwidth, self.fdepth, fwft=self.fwft, pipe=self.pipe)
- m.submodules.fifo = fifo
-
- def processfn(data_i):
- # store result of processing in combinatorial temporary
- result = _spec(self.stage.ospec, "r_temp")
- m.d.comb += nmoperator.eq(result, self.process(data_i))
- return nmoperator.cat(result)
-
- ## prev: make the FIFO (Queue object) "look" like a PrevControl...
- m.submodules.fp = fp = PrevControl()
- fp.valid_i, fp._ready_o, fp.data_i = fifo.we, fifo.writable, fifo.din
- m.d.comb += fp._connect_in(self.p, fn=processfn)
-
- # next: make the FIFO (Queue object) "look" like a NextControl...
- m.submodules.fn = fn = NextControl()
- fn.valid_o, fn.ready_i, fn.data_o = fifo.readable, fifo.re, fifo.dout
- connections = fn._connect_out(self.n, fn=nmoperator.cat)
-
- # ok ok so we can't just do the ready/valid eqs straight:
- # first 2 from connections are the ready/valid, 3rd is data.
- if self.fwft:
- m.d.comb += connections[:2] # combinatorial on next ready/valid
- else:
- m.d.sync += connections[:2] # non-fwft mode needs sync
- data_o = connections[2] # get the data
- data_o = self._postprocess(data_o) # XXX TBD, does nothing right now
- m.d.comb += data_o
-
- return m
-
-
-# aka "RegStage".
-class UnbufferedPipeline(FIFOControl):
- def __init__(self, stage, in_multi=None, stage_ctl=False):
- FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl,
- fwft=True, pipe=False)
-
-# aka "BreakReadyStage" XXX had to set fwft=True to get it to work
-class PassThroughHandshake(FIFOControl):
- def __init__(self, stage, in_multi=None, stage_ctl=False):
- FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl,
- fwft=True, pipe=True)
-
-# this is *probably* BufferedHandshake, although test #997 now succeeds.
-class BufferedHandshake(FIFOControl):
- def __init__(self, stage, in_multi=None, stage_ctl=False):
- FIFOControl.__init__(self, 2, stage, in_multi, stage_ctl,
- fwft=True, pipe=False)
-
-
-"""
-# this is *probably* SimpleHandshake (note: memory cell size=0)
-class SimpleHandshake(FIFOControl):
- def __init__(self, stage, in_multi=None, stage_ctl=False):
- FIFOControl.__init__(self, 0, stage, in_multi, stage_ctl,
- fwft=True, pipe=False)
-"""
+++ /dev/null
-""" Stage API
-
- Associated development bugs:
- * http://bugs.libre-riscv.org/show_bug.cgi?id=64
- * http://bugs.libre-riscv.org/show_bug.cgi?id=57
-
- Stage API:
- ---------
-
- stage requires compliance with a strict API that may be
- implemented in several means, including as a static class.
-
- Stages do not HOLD data, and they definitely do not contain
- signalling (ready/valid). They do however specify the FORMAT
- of the incoming and outgoing data, and they provide a means to
- PROCESS that data (from incoming format to outgoing format).
-
- Stage Blocks really should be combinatorial blocks (Moore FSMs).
- It would be ok to have input come in from sync'd sources
- (clock-driven, Mealy FSMs) however by doing so they would no longer
- be deterministic, and chaining such blocks with such side-effects
- together could result in unexpected, unpredictable, unreproduceable
- behaviour.
-
- So generally to be avoided, then unless you know what you are doing.
- https://en.wikipedia.org/wiki/Moore_machine
- https://en.wikipedia.org/wiki/Mealy_machine
-
- the methods of a stage instance must be as follows:
-
- * ispec() - Input data format specification. Takes a bit of explaining.
- The requirements are: something that eventually derives from
- nmigen Value must be returned *OR* an iterator or iterable
- or sequence (list, tuple etc.) or generator must *yield*
- thing(s) that (eventually) derive from the nmigen Value class.
-
- Complex to state, very simple in practice:
- see test_buf_pipe.py for over 25 worked examples.
-
- * ospec() - Output data format specification.
- format requirements identical to ispec.
-
- * process(m, i) - Optional function for processing ispec-formatted data.
- returns a combinatorial block of a result that
- may be assigned to the output, by way of the "nmoperator.eq"
- function. Note that what is returned here can be
- extremely flexible. Even a dictionary can be returned
- as long as it has fields that match precisely with the
- Record into which its values is intended to be assigned.
- Again: see example unit tests for details.
-
- * setup(m, i) - Optional function for setting up submodules.
- may be used for more complex stages, to link
- the input (i) to submodules. must take responsibility
- for adding those submodules to the module (m).
- the submodules must be combinatorial blocks and
- must have their inputs and output linked combinatorially.
-
- Both StageCls (for use with non-static classes) and Stage (for use
- by static classes) are abstract classes from which, for convenience
- and as a courtesy to other developers, anything conforming to the
- Stage API may *choose* to derive. See Liskov Substitution Principle:
- https://en.wikipedia.org/wiki/Liskov_substitution_principle
-
- StageChain:
- ----------
-
- A useful combinatorial wrapper around stages that chains them together
- and then presents a Stage-API-conformant interface. By presenting
- the same API as the stages it wraps, it can clearly be used recursively.
-
- StageHelper:
- ----------
-
- A convenience wrapper around a Stage-API-compliant "thing" which
- complies with the Stage API and provides mandatory versions of
- all the optional bits.
-"""
-
-from abc import ABCMeta, abstractmethod
-import inspect
-
-import nmoperator
-
-
-def _spec(fn, name=None):
- """ useful function that determines if "fn" has an argument "name".
- if so, fn(name) is called otherwise fn() is called.
-
- means that ispec and ospec can be declared with *or without*
- a name argument. normally it would be necessary to have
- "ispec(name=None)" to achieve the same effect.
- """
- if name is None:
- return fn()
- varnames = dict(inspect.getmembers(fn.__code__))['co_varnames']
- if 'name' in varnames:
- return fn(name=name)
- return fn()
-
-
-class StageCls(metaclass=ABCMeta):
- """ Class-based "Stage" API. requires instantiation (after derivation)
-
- see "Stage API" above.. Note: python does *not* require derivation
- from this class. All that is required is that the pipelines *have*
- the functions listed in this class. Derivation from this class
- is therefore merely a "courtesy" to maintainers.
- """
- @abstractmethod
- def ispec(self): pass # REQUIRED
- @abstractmethod
- def ospec(self): pass # REQUIRED
- #@abstractmethod
- #def setup(self, m, i): pass # OPTIONAL
- #@abstractmethod
- #def process(self, i): pass # OPTIONAL
-
-
-class Stage(metaclass=ABCMeta):
- """ Static "Stage" API. does not require instantiation (after derivation)
-
- see "Stage API" above. Note: python does *not* require derivation
- from this class. All that is required is that the pipelines *have*
- the functions listed in this class. Derivation from this class
- is therefore merely a "courtesy" to maintainers.
- """
- @staticmethod
- @abstractmethod
- def ispec(): pass
-
- @staticmethod
- @abstractmethod
- def ospec(): pass
-
- #@staticmethod
- #@abstractmethod
- #def setup(m, i): pass
-
- #@staticmethod
- #@abstractmethod
- #def process(i): pass
-
-
-class StageHelper(Stage):
- """ a convenience wrapper around something that is Stage-API-compliant.
- (that "something" may be a static class, for example).
-
- StageHelper happens to also be compliant with the Stage API,
- it differs from the stage that it wraps in that all the "optional"
- functions are provided (hence the designation "convenience wrapper")
- """
- def __init__(self, stage):
- self.stage = stage
- self._ispecfn = None
- self._ospecfn = None
- if stage is not None:
- self.set_specs(self, self)
-
- def ospec(self, name):
- assert self._ospecfn is not None
- return _spec(self._ospecfn, name)
-
- def ispec(self, name):
- assert self._ispecfn is not None
- return _spec(self._ispecfn, name)
-
- def set_specs(self, p, n):
- """ sets up the ispecfn and ospecfn for getting input and output data
- """
- if hasattr(p, "stage"):
- p = p.stage
- if hasattr(n, "stage"):
- n = n.stage
- self._ispecfn = p.ispec
- self._ospecfn = n.ospec
-
- def new_specs(self, name):
- """ allocates new ispec and ospec pair
- """
- return (_spec(self.ispec, "%s_i" % name),
- _spec(self.ospec, "%s_o" % name))
-
- def process(self, i):
- if self.stage and hasattr(self.stage, "process"):
- return self.stage.process(i)
- return i
-
- def setup(self, m, i):
- if self.stage is not None and hasattr(self.stage, "setup"):
- self.stage.setup(m, i)
-
- def _postprocess(self, i): # XXX DISABLED
- return i # RETURNS INPUT
- if hasattr(self.stage, "postprocess"):
- return self.stage.postprocess(i)
- return i
-
-
-class StageChain(StageHelper):
- """ pass in a list of stages, and they will automatically be
- chained together via their input and output specs into a
- combinatorial chain, to create one giant combinatorial block.
-
- the end result basically conforms to the exact same Stage API.
-
- * input to this class will be the input of the first stage
- * output of first stage goes into input of second
- * output of second goes into input into third
- * ... (etc. etc.)
- * the output of this class will be the output of the last stage
-
- NOTE: whilst this is very similar to ControlBase.connect(), it is
- *really* important to appreciate that StageChain is pure
- combinatorial and bypasses (does not involve, at all, ready/valid
- signalling of any kind).
-
- ControlBase.connect on the other hand respects, connects, and uses
- ready/valid signalling.
-
- Arguments:
-
- * :chain: a chain of combinatorial blocks conforming to the Stage API
- NOTE: StageChain.ispec and ospect have to have something
- to return (beginning and end specs of the chain),
- therefore the chain argument must be non-zero length
-
- * :specallocate: if set, new input and output data will be allocated
- and connected (eq'd) to each chained Stage.
- in some cases if this is not done, the nmigen warning
- "driving from two sources, module is being flattened"
- will be issued.
-
- NOTE: do NOT use StageChain with combinatorial blocks that have
- side-effects (state-based / clock-based input) or conditional
- (inter-chain) dependencies, unless you really know what you are doing.
- """
- def __init__(self, chain, specallocate=False):
- assert len(chain) > 0, "stage chain must be non-zero length"
- self.chain = chain
- StageHelper.__init__(self, None)
- self.setup = self._sa_setup if specallocate else self._na_setup
- self.set_specs(self.chain[0], self.chain[-1])
-
- def _sa_setup(self, m, i):
- for (idx, c) in enumerate(self.chain):
- if hasattr(c, "setup"):
- c.setup(m, i) # stage may have some module stuff
- ofn = self.chain[idx].ospec # last assignment survives
- o = _spec(ofn, 'chainin%d' % idx)
- m.d.comb += nmoperator.eq(o, c.process(i)) # process input into "o"
- if idx == len(self.chain)-1:
- break
- ifn = self.chain[idx+1].ispec # new input on next loop
- i = _spec(ifn, 'chainin%d' % (idx+1))
- m.d.comb += nmoperator.eq(i, o) # assign to next input
- self.o = o
- return self.o # last loop is the output
-
- def _na_setup(self, m, i):
- for (idx, c) in enumerate(self.chain):
- if hasattr(c, "setup"):
- c.setup(m, i) # stage may have some module stuff
- i = o = c.process(i) # store input into "o"
- self.o = o
- return self.o # last loop is the output
-
- def process(self, i):
- return self.o # conform to Stage API: return last-loop output
-
-
+++ /dev/null
-from operator import add
-
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-
-from nmigen_add_experiment import FPADD
-
-from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan,
- is_inf, is_pos_inf, is_neg_inf,
- match, get_rs_case, check_rs_case, run_test,
- run_edge_cases, run_corner_cases)
-
-def testbench(dut):
- yield from check_rs_case(dut, 0x36093399, 0x7f6a12f1, 0x7f6a12f1)
- yield from check_rs_case(dut, 0x006CE3EE, 0x806CE3EC, 0x00000002)
- yield from check_rs_case(dut, 0x00000047, 0x80000048, 0x80000001)
- yield from check_rs_case(dut, 0x000116C2, 0x8001170A, 0x80000048)
- yield from check_rs_case(dut, 0x7ed01f25, 0xff559e2c, 0xfedb1d33)
- yield from check_rs_case(dut, 0, 0, 0)
- yield from check_rs_case(dut, 0xFFFFFFFF, 0xC63B800A, 0x7FC00000)
- yield from check_rs_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
- #yield from check_rs_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
- yield from check_rs_case(dut, 0x7F800000, 0xFF800000, 0x7FC00000)
- yield from check_rs_case(dut, 0x42540000, 0xC2540000, 0x00000000)
- yield from check_rs_case(dut, 0xC2540000, 0x42540000, 0x00000000)
- yield from check_rs_case(dut, 0xfe34f995, 0xff5d59ad, 0xff800000)
- yield from check_rs_case(dut, 0x82471f51, 0x243985f, 0x801c3790)
- yield from check_rs_case(dut, 0x40000000, 0xc0000000, 0x00000000)
- yield from check_rs_case(dut, 0x3F800000, 0x40000000, 0x40400000)
- yield from check_rs_case(dut, 0x40000000, 0x3F800000, 0x40400000)
- yield from check_rs_case(dut, 0x447A0000, 0x4488B000, 0x4502D800)
- yield from check_rs_case(dut, 0x463B800A, 0x42BA8A3D, 0x463CF51E)
- yield from check_rs_case(dut, 0x42BA8A3D, 0x463B800A, 0x463CF51E)
- yield from check_rs_case(dut, 0x463B800A, 0xC2BA8A3D, 0x463A0AF6)
- yield from check_rs_case(dut, 0xC2BA8A3D, 0x463B800A, 0x463A0AF6)
- yield from check_rs_case(dut, 0xC63B800A, 0x42BA8A3D, 0xC63A0AF6)
- yield from check_rs_case(dut, 0x42BA8A3D, 0xC63B800A, 0xC63A0AF6)
- yield from check_rs_case(dut, 0x7F800000, 0x00000000, 0x7F800000)
- yield from check_rs_case(dut, 0x00000000, 0x7F800000, 0x7F800000)
- yield from check_rs_case(dut, 0xFF800000, 0x00000000, 0xFF800000)
- yield from check_rs_case(dut, 0x00000000, 0xFF800000, 0xFF800000)
- yield from check_rs_case(dut, 0x7F800000, 0x7F800000, 0x7F800000)
- yield from check_rs_case(dut, 0xFF800000, 0xFF800000, 0xFF800000)
- yield from check_rs_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
- yield from check_rs_case(dut, 0x00018643, 0x00FA72A4, 0x00FBF8E7)
- yield from check_rs_case(dut, 0x001A2239, 0x00FA72A4, 0x010A4A6E)
- yield from check_rs_case(dut, 0x3F7FFFFE, 0x3F7FFFFE, 0x3FFFFFFE)
- yield from check_rs_case(dut, 0x7EFFFFEE, 0x7EFFFFEE, 0x7F7FFFEE)
- yield from check_rs_case(dut, 0x7F7FFFEE, 0xFEFFFFEE, 0x7EFFFFEE)
- yield from check_rs_case(dut, 0x7F7FFFEE, 0x756CA884, 0x7F7FFFFD)
- yield from check_rs_case(dut, 0x7F7FFFEE, 0x758A0CF8, 0x7F7FFFFF)
- yield from check_rs_case(dut, 0x42500000, 0x51A7A358, 0x51A7A358)
- yield from check_rs_case(dut, 0x51A7A358, 0x42500000, 0x51A7A358)
- yield from check_rs_case(dut, 0x4E5693A4, 0x42500000, 0x4E5693A5)
- yield from check_rs_case(dut, 0x42500000, 0x4E5693A4, 0x4E5693A5)
- #yield from check_rs_case(dut, 1, 0, 1)
- #yield from check_rs_case(dut, 1, 1, 1)
-
- count = 0
-
- #regression tests
- stimulus_a = [0x80000000, 0x22cb525a, 0x40000000, 0x83e73d5c,
- 0xbf9b1e94, 0x34082401,
- 0x5e8ef81, 0x5c75da81, 0x2b017]
- stimulus_b = [0xff800001, 0xadd79efa, 0xC0000000, 0x1c800000,
- 0xc038ed3a, 0xb328cd45,
- 0x114f3db, 0x2f642a39, 0xff3807ab]
- yield from run_test(dut, stimulus_a, stimulus_b, add, get_rs_case)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- yield from run_corner_cases(dut, count, add, get_rs_case)
- yield from run_edge_cases(dut, count, add, get_rs_case)
-
-if __name__ == '__main__':
- dut = FPADD(width=32, id_wid=5, single_cycle=True)
- run_simulation(dut, testbench(dut), vcd_name="test_add.vcd")
-
+++ /dev/null
-from operator import add
-
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-
-from nmigen_add_experiment import FPADD
-
-from unit_test_half import (get_mantissa, get_exponent, get_sign, is_nan,
- is_inf, is_pos_inf, is_neg_inf,
- match, get_case, check_case, run_test,
- run_edge_cases, run_corner_cases)
-
-def testbench(dut):
- #yield from check_case(dut, 0x7800, 0xff6f, 0xff6f)
- #yield from check_case(dut, 0x0000, 0x7c32, 0x7e32)
- #yield from check_case(dut, 0x0000, 0x7da9, 0x7fa9)
- #yield from check_case(dut, 0x0000, 0x7ea0, 0x7ea0)
- #yield from check_case(dut, 0x7c9a, 0x8000, 0x7e9a)
- #yield from check_case(dut, 0x7d5e, 0x0000, 0x7f5e)
- #yield from check_case(dut, 0x8000, 0x7c8c, 0x7e8c)
- #yield from check_case(dut, 0x8000, 0xfc55, 0xfe55)
- #yield from check_case(dut, 0x8000, 0x7e1a, 0x7e1a)
-
- #yield from check_case(dut, 0x8000, 0xfc01, 0x7e00)
- yield from check_case(dut, 0xfc00, 0x7c00, 0x7e00)
- yield from check_case(dut, 0x8000, 0, 0)
- yield from check_case(dut, 0, 0, 0)
-
- count = 0
-
- #regression tests
- stimulus_a = [ 0x8000, 0x8000 ]
- stimulus_b = [ 0x0000, 0xfc01 ]
- yield from run_test(dut, stimulus_a, stimulus_b, add)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- yield from run_corner_cases(dut, count, add)
- yield from run_edge_cases(dut, count, add)
-
-if __name__ == '__main__':
- dut = FPADD(width=16, single_cycle=True)
- run_simulation(dut, testbench(dut), vcd_name="test_add16.vcd")
-
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-from operator import add
-
-from nmigen_add_experiment import FPADD
-
-import sys
-import atexit
-from random import randint
-from random import seed
-
-from unit_test_double import (get_mantissa, get_exponent, get_sign, is_nan,
- is_inf, is_pos_inf, is_neg_inf,
- match, get_case, check_case, run_test,
- run_edge_cases, run_corner_cases)
-
-
-def testbench(dut):
- yield from check_case(dut, 0, 0, 0)
- yield from check_case(dut, 0x3FF0000000000000, 0x4000000000000000,
- 0x4008000000000000)
- yield from check_case(dut, 0x4000000000000000, 0x3FF0000000000000,
- 0x4008000000000000)
- yield from check_case(dut, 0x4056C00000000000, 0x4042800000000000,
- 0x4060000000000000)
- yield from check_case(dut, 0x4056C00000000000, 0x4042EA3D70A3D70A,
- 0x40601A8F5C28F5C2)
-
- count = 0
-
- #regression tests
- stimulus_a = [0x3ff00000000000c5, 0xff80000000000000]
- stimulus_b = [0xbd28a404211fb72b, 0x7f80000000000000]
- yield from run_test(dut, stimulus_a, stimulus_b, add)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- yield from run_corner_cases(dut, count, add)
- yield from run_edge_cases(dut, count, add)
-
-
-if __name__ == '__main__':
- dut = FPADD(width=64, single_cycle=False)
- run_simulation(dut, testbench(dut), vcd_name="test_add64.vcd")
-
+++ /dev/null
-from random import randint
-from operator import add
-
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-
-from nmigen_add_experiment import FPADDBase, FPADDBaseMod
-
-def get_case(dut, a, b, mid):
- yield dut.in_mid.eq(mid)
- yield dut.in_a.eq(a)
- yield dut.in_b.eq(b)
- yield dut.in_t.stb.eq(1)
- yield
- yield
- yield
- yield
- ack = (yield dut.in_t.ack)
- assert ack == 0
-
- yield dut.in_t.stb.eq(0)
-
- yield dut.out_z.ack.eq(1)
-
- while True:
- out_z_stb = (yield dut.out_z.stb)
- if not out_z_stb:
- yield
- continue
- out_z = yield dut.out_z.v
- out_mid = yield dut.out_mid
- yield dut.out_z.ack.eq(0)
- yield
- break
-
- return out_z, out_mid
-
-def check_case(dut, a, b, z, mid=None):
- if mid is None:
- mid = randint(0, 6)
- out_z, out_mid = yield from get_case(dut, a, b, mid)
- assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
- assert out_mid == mid, "Output mid 0x%x != expected 0x%x" % (out_mid, mid)
-
-
-
-def testbench(dut):
- yield from check_case(dut, 0x36093399, 0x7f6a12f1, 0x7f6a12f1)
- yield from check_case(dut, 0x006CE3EE, 0x806CE3EC, 0x00000002)
- yield from check_case(dut, 0x00000047, 0x80000048, 0x80000001)
- yield from check_case(dut, 0x000116C2, 0x8001170A, 0x80000048)
- yield from check_case(dut, 0x7ed01f25, 0xff559e2c, 0xfedb1d33)
- yield from check_case(dut, 0, 0, 0)
- yield from check_case(dut, 0xFFFFFFFF, 0xC63B800A, 0x7FC00000)
- yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
- #yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
- yield from check_case(dut, 0x7F800000, 0xFF800000, 0x7FC00000)
- yield from check_case(dut, 0x42540000, 0xC2540000, 0x00000000)
- yield from check_case(dut, 0xC2540000, 0x42540000, 0x00000000)
- yield from check_case(dut, 0xfe34f995, 0xff5d59ad, 0xff800000)
- yield from check_case(dut, 0x82471f51, 0x243985f, 0x801c3790)
- yield from check_case(dut, 0x40000000, 0xc0000000, 0x00000000)
- yield from check_case(dut, 0x3F800000, 0x40000000, 0x40400000)
- yield from check_case(dut, 0x40000000, 0x3F800000, 0x40400000)
- yield from check_case(dut, 0x447A0000, 0x4488B000, 0x4502D800)
- yield from check_case(dut, 0x463B800A, 0x42BA8A3D, 0x463CF51E)
- yield from check_case(dut, 0x42BA8A3D, 0x463B800A, 0x463CF51E)
- yield from check_case(dut, 0x463B800A, 0xC2BA8A3D, 0x463A0AF6)
- yield from check_case(dut, 0xC2BA8A3D, 0x463B800A, 0x463A0AF6)
- yield from check_case(dut, 0xC63B800A, 0x42BA8A3D, 0xC63A0AF6)
- yield from check_case(dut, 0x42BA8A3D, 0xC63B800A, 0xC63A0AF6)
- yield from check_case(dut, 0x7F800000, 0x00000000, 0x7F800000)
- yield from check_case(dut, 0x00000000, 0x7F800000, 0x7F800000)
- yield from check_case(dut, 0xFF800000, 0x00000000, 0xFF800000)
- yield from check_case(dut, 0x00000000, 0xFF800000, 0xFF800000)
- yield from check_case(dut, 0x7F800000, 0x7F800000, 0x7F800000)
- yield from check_case(dut, 0xFF800000, 0xFF800000, 0xFF800000)
- yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
- yield from check_case(dut, 0x00018643, 0x00FA72A4, 0x00FBF8E7)
- yield from check_case(dut, 0x001A2239, 0x00FA72A4, 0x010A4A6E)
- yield from check_case(dut, 0x3F7FFFFE, 0x3F7FFFFE, 0x3FFFFFFE)
- yield from check_case(dut, 0x7EFFFFEE, 0x7EFFFFEE, 0x7F7FFFEE)
- yield from check_case(dut, 0x7F7FFFEE, 0xFEFFFFEE, 0x7EFFFFEE)
- yield from check_case(dut, 0x7F7FFFEE, 0x756CA884, 0x7F7FFFFD)
- yield from check_case(dut, 0x7F7FFFEE, 0x758A0CF8, 0x7F7FFFFF)
- yield from check_case(dut, 0x42500000, 0x51A7A358, 0x51A7A358)
- yield from check_case(dut, 0x51A7A358, 0x42500000, 0x51A7A358)
- yield from check_case(dut, 0x4E5693A4, 0x42500000, 0x4E5693A5)
- yield from check_case(dut, 0x42500000, 0x4E5693A4, 0x4E5693A5)
-
-if __name__ == '__main__':
- dut = FPADDBaseMod(width=32, id_wid=5, single_cycle=True)
- run_simulation(dut, testbench(dut), vcd_name="test_add.vcd")
-
+++ /dev/null
-""" Unit tests for Buffered and Unbuffered pipelines
-
- contains useful worked examples of how to use the Pipeline API,
- including:
-
- * Combinatorial Stage "Chaining"
- * class-based data stages
- * nmigen module-based data stages
- * special nmigen module-based data stage, where the stage *is* the module
- * Record-based data stages
- * static-class data stages
- * multi-stage pipelines (and how to connect them)
- * how to *use* the pipelines (see Test5) - how to get data in and out
-
-"""
-
-from nmigen import Module, Signal, Mux, Const, Elaboratable
-from nmigen.hdl.rec import Record
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-
-from example_buf_pipe import ExampleBufPipe, ExampleBufPipeAdd
-from example_buf_pipe import ExamplePipeline, UnbufferedPipeline
-from example_buf_pipe import ExampleStageCls
-from example_buf_pipe import PrevControl, NextControl, BufferedHandshake
-from example_buf_pipe import StageChain, ControlBase, StageCls
-from singlepipe import UnbufferedPipeline2
-from singlepipe import SimpleHandshake
-from singlepipe import PassThroughHandshake
-from singlepipe import PassThroughStage
-from singlepipe import FIFOControl
-from singlepipe import RecordObject
-
-from random import randint, seed
-
-#seed(4)
-
-
-def check_o_n_valid(dut, val):
- o_n_valid = yield dut.n.valid_o
- assert o_n_valid == val
-
-def check_o_n_valid2(dut, val):
- o_n_valid = yield dut.n.valid_o
- assert o_n_valid == val
-
-
-def tbench(dut):
- #yield dut.i_p_rst.eq(1)
- yield dut.n.ready_i.eq(0)
- #yield dut.p.ready_o.eq(0)
- yield
- yield
- #yield dut.i_p_rst.eq(0)
- yield dut.n.ready_i.eq(1)
- yield dut.p.data_i.eq(5)
- yield dut.p.valid_i.eq(1)
- yield
-
- yield dut.p.data_i.eq(7)
- yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed
- yield
- yield from check_o_n_valid(dut, 1) # ok *now* i_p_valid effect is felt
-
- yield dut.p.data_i.eq(2)
- yield
- yield dut.n.ready_i.eq(0) # begin going into "stall" (next stage says ready)
- yield dut.p.data_i.eq(9)
- yield
- yield dut.p.valid_i.eq(0)
- yield dut.p.data_i.eq(12)
- yield
- yield dut.p.data_i.eq(32)
- yield dut.n.ready_i.eq(1)
- yield
- yield from check_o_n_valid(dut, 1) # buffer still needs to output
- yield
- yield from check_o_n_valid(dut, 1) # buffer still needs to output
- yield
- yield from check_o_n_valid(dut, 0) # buffer outputted, *now* we're done.
- yield
-
-
-def tbench2(dut):
- #yield dut.p.i_rst.eq(1)
- yield dut.n.ready_i.eq(0)
- #yield dut.p.ready_o.eq(0)
- yield
- yield
- #yield dut.p.i_rst.eq(0)
- yield dut.n.ready_i.eq(1)
- yield dut.p.data_i.eq(5)
- yield dut.p.valid_i.eq(1)
- yield
-
- yield dut.p.data_i.eq(7)
- yield from check_o_n_valid2(dut, 0) # effects of i_p_valid delayed 2 clocks
- yield
- yield from check_o_n_valid2(dut, 0) # effects of i_p_valid delayed 2 clocks
-
- yield dut.p.data_i.eq(2)
- yield
- yield from check_o_n_valid2(dut, 1) # ok *now* i_p_valid effect is felt
- yield dut.n.ready_i.eq(0) # begin going into "stall" (next stage says ready)
- yield dut.p.data_i.eq(9)
- yield
- yield dut.p.valid_i.eq(0)
- yield dut.p.data_i.eq(12)
- yield
- yield dut.p.data_i.eq(32)
- yield dut.n.ready_i.eq(1)
- yield
- yield from check_o_n_valid2(dut, 1) # buffer still needs to output
- yield
- yield from check_o_n_valid2(dut, 1) # buffer still needs to output
- yield
- yield from check_o_n_valid2(dut, 1) # buffer still needs to output
- yield
- yield from check_o_n_valid2(dut, 0) # buffer outputted, *now* we're done.
- yield
- yield
- yield
-
-
-class Test3:
- def __init__(self, dut, resultfn):
- self.dut = dut
- self.resultfn = resultfn
- self.data = []
- for i in range(num_tests):
- #data.append(randint(0, 1<<16-1))
- self.data.append(i+1)
- self.i = 0
- self.o = 0
-
- def send(self):
- while self.o != len(self.data):
- send_range = randint(0, 3)
- for j in range(randint(1,10)):
- if send_range == 0:
- send = True
- else:
- send = randint(0, send_range) != 0
- o_p_ready = yield self.dut.p.ready_o
- if not o_p_ready:
- yield
- continue
- if send and self.i != len(self.data):
- yield self.dut.p.valid_i.eq(1)
- yield self.dut.p.data_i.eq(self.data[self.i])
- self.i += 1
- else:
- yield self.dut.p.valid_i.eq(0)
- yield
-
- def rcv(self):
- while self.o != len(self.data):
- stall_range = randint(0, 3)
- for j in range(randint(1,10)):
- stall = randint(0, stall_range) != 0
- yield self.dut.n.ready_i.eq(stall)
- yield
- o_n_valid = yield self.dut.n.valid_o
- i_n_ready = yield self.dut.n.ready_i_test
- if not o_n_valid or not i_n_ready:
- continue
- data_o = yield self.dut.n.data_o
- self.resultfn(data_o, self.data[self.o], self.i, self.o)
- self.o += 1
- if self.o == len(self.data):
- break
-
-def resultfn_3(data_o, expected, i, o):
- assert data_o == expected + 1, \
- "%d-%d data %x not match %x\n" \
- % (i, o, data_o, expected)
-
-def data_placeholder():
- data = []
- for i in range(num_tests):
- d = PlaceHolder()
- d.src1 = randint(0, 1<<16-1)
- d.src2 = randint(0, 1<<16-1)
- data.append(d)
- return data
-
-def data_dict():
- data = []
- for i in range(num_tests):
- data.append({'src1': randint(0, 1<<16-1),
- 'src2': randint(0, 1<<16-1)})
- return data
-
-
-class Test5:
- def __init__(self, dut, resultfn, data=None, stage_ctl=False):
- self.dut = dut
- self.resultfn = resultfn
- self.stage_ctl = stage_ctl
- if data:
- self.data = data
- else:
- self.data = []
- for i in range(num_tests):
- self.data.append((randint(0, 1<<16-1), randint(0, 1<<16-1)))
- self.i = 0
- self.o = 0
-
- def send(self):
- while self.o != len(self.data):
- send_range = randint(0, 3)
- for j in range(randint(1,10)):
- if send_range == 0:
- send = True
- else:
- send = randint(0, send_range) != 0
- #send = True
- o_p_ready = yield self.dut.p.ready_o
- if not o_p_ready:
- yield
- continue
- if send and self.i != len(self.data):
- yield self.dut.p.valid_i.eq(1)
- for v in self.dut.set_input(self.data[self.i]):
- yield v
- self.i += 1
- else:
- yield self.dut.p.valid_i.eq(0)
- yield
-
- def rcv(self):
- while self.o != len(self.data):
- stall_range = randint(0, 3)
- for j in range(randint(1,10)):
- ready = randint(0, stall_range) != 0
- #ready = True
- yield self.dut.n.ready_i.eq(ready)
- yield
- o_n_valid = yield self.dut.n.valid_o
- i_n_ready = yield self.dut.n.ready_i_test
- if not o_n_valid or not i_n_ready:
- continue
- if isinstance(self.dut.n.data_o, Record):
- data_o = {}
- dod = self.dut.n.data_o
- for k, v in dod.fields.items():
- data_o[k] = yield v
- else:
- data_o = yield self.dut.n.data_o
- self.resultfn(data_o, self.data[self.o], self.i, self.o)
- self.o += 1
- if self.o == len(self.data):
- break
-
-def resultfn_5(data_o, expected, i, o):
- res = expected[0] + expected[1]
- assert data_o == res, \
- "%d-%d data %x not match %s\n" \
- % (i, o, data_o, repr(expected))
-
-def tbench4(dut):
- data = []
- for i in range(num_tests):
- #data.append(randint(0, 1<<16-1))
- data.append(i+1)
- i = 0
- o = 0
- while True:
- stall = randint(0, 3) != 0
- send = randint(0, 5) != 0
- yield dut.n.ready_i.eq(stall)
- o_p_ready = yield dut.p.ready_o
- if o_p_ready:
- if send and i != len(data):
- yield dut.p.valid_i.eq(1)
- yield dut.p.data_i.eq(data[i])
- i += 1
- else:
- yield dut.p.valid_i.eq(0)
- yield
- o_n_valid = yield dut.n.valid_o
- i_n_ready = yield dut.n.ready_i_test
- if o_n_valid and i_n_ready:
- data_o = yield dut.n.data_o
- assert data_o == data[o] + 2, "%d-%d data %x not match %x\n" \
- % (i, o, data_o, data[o])
- o += 1
- if o == len(data):
- break
-
-######################################################################
-# Test 2 and 4
-######################################################################
-
-class ExampleBufPipe2(ControlBase):
- """ Example of how to do chained pipeline stages.
- """
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
-
- pipe1 = ExampleBufPipe()
- pipe2 = ExampleBufPipe()
-
- m.submodules.pipe1 = pipe1
- m.submodules.pipe2 = pipe2
-
- m.d.comb += self.connect([pipe1, pipe2])
-
- return m
-
-
-######################################################################
-# Test 9
-######################################################################
-
-class ExampleBufPipeChain2(BufferedHandshake):
- """ connects two stages together as a *single* combinatorial stage.
- """
- def __init__(self):
- stage1 = ExampleStageCls()
- stage2 = ExampleStageCls()
- combined = StageChain([stage1, stage2])
- BufferedHandshake.__init__(self, combined)
-
-
-def data_chain2():
- data = []
- for i in range(num_tests):
- data.append(randint(0, 1<<16-2))
- return data
-
-
-def resultfn_9(data_o, expected, i, o):
- res = expected + 2
- assert data_o == res, \
- "%d-%d received data %x not match expected %x\n" \
- % (i, o, data_o, res)
-
-
-######################################################################
-# Test 6 and 10
-######################################################################
-
-class SetLessThan(Elaboratable):
- def __init__(self, width, signed):
- self.m = Module()
- self.src1 = Signal((width, signed), name="src1")
- self.src2 = Signal((width, signed), name="src2")
- self.output = Signal(width, name="out")
-
- def elaborate(self, platform):
- self.m.d.comb += self.output.eq(Mux(self.src1 < self.src2, 1, 0))
- return self.m
-
-
-class LTStage(StageCls):
- """ module-based stage example
- """
- def __init__(self):
- self.slt = SetLessThan(16, True)
-
- def ispec(self, name):
- return (Signal(16, name="%s_sig1" % name),
- Signal(16, name="%s_sig2" % name))
-
- def ospec(self, name):
- return Signal(16, "%s_out" % name)
-
- def setup(self, m, i):
- self.o = Signal(16)
- m.submodules.slt = self.slt
- m.d.comb += self.slt.src1.eq(i[0])
- m.d.comb += self.slt.src2.eq(i[1])
- m.d.comb += self.o.eq(self.slt.output)
-
- def process(self, i):
- return self.o
-
-
-class LTStageDerived(SetLessThan, StageCls):
- """ special version of a nmigen module where the module is also a stage
-
- shows that you don't actually need to combinatorially connect
- to the outputs, or add the module as a submodule: just return
- the module output parameter(s) from the Stage.process() function
- """
-
- def __init__(self):
- SetLessThan.__init__(self, 16, True)
-
- def ispec(self):
- return (Signal(16), Signal(16))
-
- def ospec(self):
- return Signal(16)
-
- def setup(self, m, i):
- m.submodules.slt = self
- m.d.comb += self.src1.eq(i[0])
- m.d.comb += self.src2.eq(i[1])
-
- def process(self, i):
- return self.output
-
-
-class ExampleLTPipeline(UnbufferedPipeline):
- """ an example of how to use the unbuffered pipeline.
- """
-
- def __init__(self):
- stage = LTStage()
- UnbufferedPipeline.__init__(self, stage)
-
-
-class ExampleLTBufferedPipeDerived(BufferedHandshake):
- """ an example of how to use the buffered pipeline.
- """
-
- def __init__(self):
- stage = LTStageDerived()
- BufferedHandshake.__init__(self, stage)
-
-
-def resultfn_6(data_o, expected, i, o):
- res = 1 if expected[0] < expected[1] else 0
- assert data_o == res, \
- "%d-%d data %x not match %s\n" \
- % (i, o, data_o, repr(expected))
-
-
-######################################################################
-# Test 7
-######################################################################
-
-class ExampleAddRecordStage(StageCls):
- """ example use of a Record
- """
-
- record_spec = [('src1', 16), ('src2', 16)]
- def ispec(self):
- """ returns a Record using the specification
- """
- return Record(self.record_spec)
-
- def ospec(self):
- return Record(self.record_spec)
-
- def process(self, i):
- """ process the input data, returning a dictionary with key names
- that exactly match the Record's attributes.
- """
- return {'src1': i.src1 + 1,
- 'src2': i.src2 + 1}
-
-######################################################################
-# Test 11
-######################################################################
-
-class ExampleAddRecordPlaceHolderStage(StageCls):
- """ example use of a Record, with a placeholder as the processing result
- """
-
- record_spec = [('src1', 16), ('src2', 16)]
- def ispec(self):
- """ returns a Record using the specification
- """
- return Record(self.record_spec)
-
- def ospec(self):
- return Record(self.record_spec)
-
- def process(self, i):
- """ process the input data, returning a PlaceHolder class instance
- with attributes that exactly match those of the Record.
- """
- o = PlaceHolder()
- o.src1 = i.src1 + 1
- o.src2 = i.src2 + 1
- return o
-
-
-# a dummy class that may have stuff assigned to instances once created
-class PlaceHolder: pass
-
-
-class ExampleAddRecordPipe(UnbufferedPipeline):
- """ an example of how to use the combinatorial pipeline.
- """
-
- def __init__(self):
- stage = ExampleAddRecordStage()
- UnbufferedPipeline.__init__(self, stage)
-
-
-def resultfn_7(data_o, expected, i, o):
- res = (expected['src1'] + 1, expected['src2'] + 1)
- assert data_o['src1'] == res[0] and data_o['src2'] == res[1], \
- "%d-%d data %s not match %s\n" \
- % (i, o, repr(data_o), repr(expected))
-
-
-class ExampleAddRecordPlaceHolderPipe(UnbufferedPipeline):
- """ an example of how to use the combinatorial pipeline.
- """
-
- def __init__(self):
- stage = ExampleAddRecordPlaceHolderStage()
- UnbufferedPipeline.__init__(self, stage)
-
-
-def resultfn_test11(data_o, expected, i, o):
- res1 = expected.src1 + 1
- res2 = expected.src2 + 1
- assert data_o['src1'] == res1 and data_o['src2'] == res2, \
- "%d-%d data %s not match %s\n" \
- % (i, o, repr(data_o), repr(expected))
-
-
-######################################################################
-# Test 8
-######################################################################
-
-
-class Example2OpClass:
- """ an example of a class used to store 2 operands.
- requires an eq function, to conform with the pipeline stage API
- """
-
- def __init__(self):
- self.op1 = Signal(16)
- self.op2 = Signal(16)
-
- def eq(self, i):
- return [self.op1.eq(i.op1), self.op2.eq(i.op2)]
-
-
-class ExampleAddClassStage(StageCls):
- """ an example of how to use the buffered pipeline, as a class instance
- """
-
- def ispec(self):
- """ returns an instance of an Example2OpClass.
- """
- return Example2OpClass()
-
- def ospec(self):
- """ returns an output signal which will happen to contain the sum
- of the two inputs
- """
- return Signal(16, name="add2_out")
-
- def process(self, i):
- """ process the input data (sums the values in the tuple) and returns it
- """
- return i.op1 + i.op2
-
-
-class ExampleBufPipeAddClass(BufferedHandshake):
- """ an example of how to use the buffered pipeline, using a class instance
- """
-
- def __init__(self):
- addstage = ExampleAddClassStage()
- BufferedHandshake.__init__(self, addstage)
-
-
-class TestInputAdd:
- """ the eq function, called by set_input, needs an incoming object
- that conforms to the Example2OpClass.eq function requirements
- easiest way to do that is to create a class that has the exact
- same member layout (self.op1, self.op2) as Example2OpClass
- """
- def __init__(self, op1, op2):
- self.op1 = op1
- self.op2 = op2
-
-
-def resultfn_8(data_o, expected, i, o):
- res = expected.op1 + expected.op2 # these are a TestInputAdd instance
- assert data_o == res, \
- "%d-%d data %s res %x not match %s\n" \
- % (i, o, repr(data_o), res, repr(expected))
-
-def data_2op():
- data = []
- for i in range(num_tests):
- data.append(TestInputAdd(randint(0, 1<<16-1), randint(0, 1<<16-1)))
- return data
-
-
-######################################################################
-# Test 12
-######################################################################
-
-class ExampleStageDelayCls(StageCls, Elaboratable):
- """ an example of how to use the buffered pipeline, in a static class
- fashion
- """
-
- def __init__(self, valid_trigger=2):
- self.count = Signal(2)
- self.valid_trigger = valid_trigger
-
- def ispec(self):
- return Signal(16, name="example_input_signal")
-
- def ospec(self):
- return Signal(16, name="example_output_signal")
-
- @property
- def d_ready(self):
- """ data is ready to be accepted when this is true
- """
- return (self.count == 1)# | (self.count == 3)
- return Const(1)
-
- def d_valid(self, ready_i):
- """ data is valid at output when this is true
- """
- return self.count == self.valid_trigger
- return Const(1)
-
- def process(self, i):
- """ process the input data and returns it (adds 1)
- """
- return i + 1
-
- def elaborate(self, platform):
- m = Module()
- m.d.sync += self.count.eq(self.count + 1)
- return m
-
-
-class ExampleBufDelayedPipe(BufferedHandshake):
-
- def __init__(self):
- stage = ExampleStageDelayCls(valid_trigger=2)
- BufferedHandshake.__init__(self, stage, stage_ctl=True)
-
- def elaborate(self, platform):
- m = BufferedHandshake.elaborate(self, platform)
- m.submodules.stage = self.stage
- return m
-
-
-def data_chain1():
- data = []
- for i in range(num_tests):
- data.append(1<<((i*3)%15))
- #data.append(randint(0, 1<<16-2))
- #print (hex(data[-1]))
- return data
-
-
-def resultfn_12(data_o, expected, i, o):
- res = expected + 1
- assert data_o == res, \
- "%d-%d data %x not match %x\n" \
- % (i, o, data_o, res)
-
-
-######################################################################
-# Test 13
-######################################################################
-
-class ExampleUnBufDelayedPipe(BufferedHandshake):
-
- def __init__(self):
- stage = ExampleStageDelayCls(valid_trigger=3)
- BufferedHandshake.__init__(self, stage, stage_ctl=True)
-
- def elaborate(self, platform):
- m = BufferedHandshake.elaborate(self, platform)
- m.submodules.stage = self.stage
- return m
-
-######################################################################
-# Test 15
-######################################################################
-
-class ExampleBufModeAdd1Pipe(SimpleHandshake):
-
- def __init__(self):
- stage = ExampleStageCls()
- SimpleHandshake.__init__(self, stage)
-
-
-######################################################################
-# Test 16
-######################################################################
-
-class ExampleBufModeUnBufPipe(ControlBase):
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
-
- pipe1 = ExampleBufModeAdd1Pipe()
- pipe2 = ExampleBufAdd1Pipe()
-
- m.submodules.pipe1 = pipe1
- m.submodules.pipe2 = pipe2
-
- m.d.comb += self.connect([pipe1, pipe2])
-
- return m
-
-######################################################################
-# Test 17
-######################################################################
-
-class ExampleUnBufAdd1Pipe2(UnbufferedPipeline2):
-
- def __init__(self):
- stage = ExampleStageCls()
- UnbufferedPipeline2.__init__(self, stage)
-
-
-######################################################################
-# Test 18
-######################################################################
-
-class PassThroughTest(PassThroughHandshake):
-
- def iospecfn(self):
- return Signal(16, "out")
-
- def __init__(self):
- stage = PassThroughStage(self.iospecfn)
- PassThroughHandshake.__init__(self, stage)
-
-def resultfn_identical(data_o, expected, i, o):
- res = expected
- assert data_o == res, \
- "%d-%d data %x not match %x\n" \
- % (i, o, data_o, res)
-
-
-######################################################################
-# Test 19
-######################################################################
-
-class ExamplePassAdd1Pipe(PassThroughHandshake):
-
- def __init__(self):
- stage = ExampleStageCls()
- PassThroughHandshake.__init__(self, stage)
-
-
-class ExampleBufPassThruPipe(ControlBase):
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
-
- # XXX currently fails: any other permutation works fine.
- # p1=u,p2=b ok p1=u,p2=u ok p1=b,p2=b ok
- # also fails using UnbufferedPipeline as well
- pipe1 = ExampleBufModeAdd1Pipe()
- pipe2 = ExamplePassAdd1Pipe()
-
- m.submodules.pipe1 = pipe1
- m.submodules.pipe2 = pipe2
-
- m.d.comb += self.connect([pipe1, pipe2])
-
- return m
-
-
-######################################################################
-# Test 20
-######################################################################
-
-def iospecfn():
- return Signal(16, name="d_in")
-
-class FIFOTest16(FIFOControl):
-
- def __init__(self):
- stage = PassThroughStage(iospecfn)
- FIFOControl.__init__(self, 2, stage)
-
-
-######################################################################
-# Test 21
-######################################################################
-
-class ExampleFIFOPassThruPipe1(ControlBase):
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
-
- pipe1 = FIFOTest16()
- pipe2 = FIFOTest16()
- pipe3 = ExamplePassAdd1Pipe()
-
- m.submodules.pipe1 = pipe1
- m.submodules.pipe2 = pipe2
- m.submodules.pipe3 = pipe3
-
- m.d.comb += self.connect([pipe1, pipe2, pipe3])
-
- return m
-
-
-######################################################################
-# Test 22
-######################################################################
-
-class Example2OpRecord(RecordObject):
- def __init__(self):
- RecordObject.__init__(self)
- self.op1 = Signal(16)
- self.op2 = Signal(16)
-
-
-class ExampleAddRecordObjectStage(StageCls):
-
- def ispec(self):
- """ returns an instance of an Example2OpRecord.
- """
- return Example2OpRecord()
-
- def ospec(self):
- """ returns an output signal which will happen to contain the sum
- of the two inputs
- """
- return Signal(16)
-
- def process(self, i):
- """ process the input data (sums the values in the tuple) and returns it
- """
- return i.op1 + i.op2
-
-
-class ExampleRecordHandshakeAddClass(SimpleHandshake):
-
- def __init__(self):
- addstage = ExampleAddRecordObjectStage()
- SimpleHandshake.__init__(self, stage=addstage)
-
-
-######################################################################
-# Test 23
-######################################################################
-
-def iospecfnrecord():
- return Example2OpRecord()
-
-class FIFOTestRecordControl(FIFOControl):
-
- def __init__(self):
- stage = PassThroughStage(iospecfnrecord)
- FIFOControl.__init__(self, 2, stage)
-
-
-class ExampleFIFORecordObjectPipe(ControlBase):
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
-
- pipe1 = FIFOTestRecordControl()
- pipe2 = ExampleRecordHandshakeAddClass()
-
- m.submodules.pipe1 = pipe1
- m.submodules.pipe2 = pipe2
-
- m.d.comb += self.connect([pipe1, pipe2])
-
- return m
-
-
-######################################################################
-# Test 24
-######################################################################
-
-class FIFOTestRecordAddStageControl(FIFOControl):
-
- def __init__(self):
- stage = ExampleAddRecordObjectStage()
- FIFOControl.__init__(self, 2, stage)
-
-
-
-######################################################################
-# Test 25
-######################################################################
-
-class FIFOTestAdd16(FIFOControl):
-
- def __init__(self):
- stage = ExampleStageCls()
- FIFOControl.__init__(self, 2, stage)
-
-
-class ExampleFIFOAdd2Pipe(ControlBase):
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
-
- pipe1 = FIFOTestAdd16()
- pipe2 = FIFOTestAdd16()
-
- m.submodules.pipe1 = pipe1
- m.submodules.pipe2 = pipe2
-
- m.d.comb += self.connect([pipe1, pipe2])
-
- return m
-
-
-######################################################################
-# Test 26
-######################################################################
-
-def iospecfn24():
- return (Signal(16, name="src1"), Signal(16, name="src2"))
-
-class FIFOTest2x16(FIFOControl):
-
- def __init__(self):
- stage = PassThroughStage(iospecfn2)
- FIFOControl.__init__(self, 2, stage)
-
-
-######################################################################
-# Test 997
-######################################################################
-
-class ExampleBufPassThruPipe2(ControlBase):
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
-
- # XXX currently fails: any other permutation works fine.
- # p1=u,p2=b ok p1=u,p2=u ok p1=b,p2=b ok
- # also fails using UnbufferedPipeline as well
- #pipe1 = ExampleUnBufAdd1Pipe()
- #pipe2 = ExampleBufAdd1Pipe()
- pipe1 = ExampleBufAdd1Pipe()
- pipe2 = ExamplePassAdd1Pipe()
-
- m.submodules.pipe1 = pipe1
- m.submodules.pipe2 = pipe2
-
- m.d.comb += self.connect([pipe1, pipe2])
-
- return m
-
-
-######################################################################
-# Test 998
-######################################################################
-
-class ExampleBufPipe3(ControlBase):
- """ Example of how to do delayed pipeline, where the stage signals
- whether it is ready.
- """
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
-
- pipe1 = ExampleBufDelayedPipe()
- pipe2 = ExampleBufPipe()
-
- m.submodules.pipe1 = pipe1
- m.submodules.pipe2 = pipe2
-
- m.d.comb += self.connect([pipe1, pipe2])
-
- return m
-
-######################################################################
-# Test 999 - XXX FAILS
-# http://bugs.libre-riscv.org/show_bug.cgi?id=57
-######################################################################
-
-class ExampleBufAdd1Pipe(BufferedHandshake):
-
- def __init__(self):
- stage = ExampleStageCls()
- BufferedHandshake.__init__(self, stage)
-
-
-class ExampleUnBufAdd1Pipe(UnbufferedPipeline):
-
- def __init__(self):
- stage = ExampleStageCls()
- UnbufferedPipeline.__init__(self, stage)
-
-
-class ExampleBufUnBufPipe(ControlBase):
-
- def elaborate(self, platform):
- m = ControlBase.elaborate(self, platform)
-
- # XXX currently fails: any other permutation works fine.
- # p1=u,p2=b ok p1=u,p2=u ok p1=b,p2=b ok
- # also fails using UnbufferedPipeline as well
- #pipe1 = ExampleUnBufAdd1Pipe()
- #pipe2 = ExampleBufAdd1Pipe()
- pipe1 = ExampleBufAdd1Pipe()
- pipe2 = ExampleUnBufAdd1Pipe()
-
- m.submodules.pipe1 = pipe1
- m.submodules.pipe2 = pipe2
-
- m.d.comb += self.connect([pipe1, pipe2])
-
- return m
-
-
-######################################################################
-# Unit Tests
-######################################################################
-
-num_tests = 10
-
-if __name__ == '__main__':
- if False:
- print ("test 1")
- dut = ExampleBufPipe()
- run_simulation(dut, tbench(dut), vcd_name="test_bufpipe.vcd")
-
- print ("test 2")
- dut = ExampleBufPipe2()
- run_simulation(dut, tbench2(dut), vcd_name="test_bufpipe2.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_bufpipe2.il", "w") as f:
- f.write(vl)
-
-
- print ("test 3")
- dut = ExampleBufPipe()
- test = Test3(dut, resultfn_3)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe3.vcd")
-
- print ("test 3.5")
- dut = ExamplePipeline()
- test = Test3(dut, resultfn_3)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_combpipe3.vcd")
-
- print ("test 4")
- dut = ExampleBufPipe2()
- run_simulation(dut, tbench4(dut), vcd_name="test_bufpipe4.vcd")
-
- print ("test 5")
- dut = ExampleBufPipeAdd()
- test = Test5(dut, resultfn_5, stage_ctl=True)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe5.vcd")
-
- print ("test 6")
- dut = ExampleLTPipeline()
- test = Test5(dut, resultfn_6)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_ltcomb6.vcd")
-
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- list(dut.p.data_i) + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_ltcomb_pipe.il", "w") as f:
- f.write(vl)
-
- print ("test 7")
- dut = ExampleAddRecordPipe()
- data=data_dict()
- test = Test5(dut, resultfn_7, data=data)
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o,
- dut.p.data_i.src1, dut.p.data_i.src2,
- dut.n.data_o.src1, dut.n.data_o.src2]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_recordcomb_pipe.il", "w") as f:
- f.write(vl)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord.vcd")
-
- print ("test 8")
- dut = ExampleBufPipeAddClass()
- data=data_2op()
- test = Test5(dut, resultfn_8, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe8.vcd")
-
- print ("test 9")
- dut = ExampleBufPipeChain2()
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_bufpipechain2.il", "w") as f:
- f.write(vl)
-
- data = data_chain2()
- test = Test5(dut, resultfn_9, data=data)
- run_simulation(dut, [test.send, test.rcv],
- vcd_name="test_bufpipechain2.vcd")
-
- print ("test 10")
- dut = ExampleLTBufferedPipeDerived()
- test = Test5(dut, resultfn_6)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_ltbufpipe10.vcd")
- vl = rtlil.convert(dut, ports=ports)
- with open("test_ltbufpipe10.il", "w") as f:
- f.write(vl)
-
- print ("test 11")
- dut = ExampleAddRecordPlaceHolderPipe()
- data=data_placeholder()
- test = Test5(dut, resultfn_test11, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord.vcd")
-
-
- print ("test 12")
- dut = ExampleBufDelayedPipe()
- data = data_chain1()
- test = Test5(dut, resultfn_12, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe12.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_bufpipe12.il", "w") as f:
- f.write(vl)
-
- print ("test 13")
- dut = ExampleUnBufDelayedPipe()
- data = data_chain1()
- test = Test5(dut, resultfn_12, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_unbufpipe13.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_unbufpipe13.il", "w") as f:
- f.write(vl)
-
- print ("test 15")
- dut = ExampleBufModeAdd1Pipe()
- data = data_chain1()
- test = Test5(dut, resultfn_12, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufunbuf15.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_bufunbuf15.il", "w") as f:
- f.write(vl)
-
- print ("test 16")
- dut = ExampleBufModeUnBufPipe()
- data = data_chain1()
- test = Test5(dut, resultfn_9, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufunbuf16.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_bufunbuf16.il", "w") as f:
- f.write(vl)
-
- print ("test 17")
- dut = ExampleUnBufAdd1Pipe2()
- data = data_chain1()
- test = Test5(dut, resultfn_12, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_unbufpipe17.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_unbufpipe17.il", "w") as f:
- f.write(vl)
-
- print ("test 18")
- dut = PassThroughTest()
- data = data_chain1()
- test = Test5(dut, resultfn_identical, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_passthru18.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_passthru18.il", "w") as f:
- f.write(vl)
-
- print ("test 19")
- dut = ExampleBufPassThruPipe()
- data = data_chain1()
- test = Test5(dut, resultfn_9, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpass19.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_bufpass19.il", "w") as f:
- f.write(vl)
-
- print ("test 20")
- dut = FIFOTest16()
- data = data_chain1()
- test = Test5(dut, resultfn_identical, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_fifo20.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_fifo20.il", "w") as f:
- f.write(vl)
-
- print ("test 21")
- dut = ExampleFIFOPassThruPipe1()
- data = data_chain1()
- test = Test5(dut, resultfn_12, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_fifopass21.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_fifopass21.il", "w") as f:
- f.write(vl)
-
- print ("test 22")
- dut = ExampleRecordHandshakeAddClass()
- data=data_2op()
- test = Test5(dut, resultfn_8, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord22.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i.op1, dut.p.data_i.op2] + \
- [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_addrecord22.il", "w") as f:
- f.write(vl)
-
- print ("test 23")
- dut = ExampleFIFORecordObjectPipe()
- data=data_2op()
- test = Test5(dut, resultfn_8, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord23.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i.op1, dut.p.data_i.op2] + \
- [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_addrecord23.il", "w") as f:
- f.write(vl)
-
- print ("test 24")
- dut = FIFOTestRecordAddStageControl()
- data=data_2op()
- test = Test5(dut, resultfn_8, data=data)
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i.op1, dut.p.data_i.op2] + \
- [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_addrecord24.il", "w") as f:
- f.write(vl)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord24.vcd")
-
- print ("test 25")
- dut = ExampleFIFOAdd2Pipe()
- data = data_chain1()
- test = Test5(dut, resultfn_9, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_add2pipe25.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_add2pipe25.il", "w") as f:
- f.write(vl)
-
- print ("test 997")
- dut = ExampleBufPassThruPipe2()
- data = data_chain1()
- test = Test5(dut, resultfn_9, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpass997.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_bufpass997.il", "w") as f:
- f.write(vl)
-
- print ("test 998 (fails, bug)")
- dut = ExampleBufPipe3()
- data = data_chain1()
- test = Test5(dut, resultfn_9, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe14.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_bufpipe14.il", "w") as f:
- f.write(vl)
-
- print ("test 999 (expected to fail, which is a bug)")
- dut = ExampleBufUnBufPipe()
- data = data_chain1()
- test = Test5(dut, resultfn_9, data=data)
- run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufunbuf999.vcd")
- ports = [dut.p.valid_i, dut.n.ready_i,
- dut.n.valid_o, dut.p.ready_o] + \
- [dut.p.data_i] + [dut.n.data_o]
- vl = rtlil.convert(dut, ports=ports)
- with open("test_bufunbuf999.il", "w") as f:
- f.write(vl)
-
+++ /dev/null
-import sys
-from random import randint
-from random import seed
-from operator import truediv
-
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-
-from nmigen_div_experiment import FPDIV
-
-from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan,
- is_inf, is_pos_inf, is_neg_inf,
- match, get_case, check_case, run_test,
- run_edge_cases, run_corner_cases)
-
-
-def testbench(dut):
- yield from check_case(dut, 0x80000000, 0x00000000, 0xffc00000)
- yield from check_case(dut, 0x00000000, 0x80000000, 0xffc00000)
- yield from check_case(dut, 0x0002b017, 0xff3807ab, 0x80000000)
- yield from check_case(dut, 0x40000000, 0x3F800000, 0x40000000)
- yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000)
- yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB)
- yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C)
- yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2)
- yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8)
- yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC)
- yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5)
- yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2)
-
- count = 0
-
- #regression tests
- stimulus_a = [0xbf9b1e94, 0x34082401, 0x5e8ef81, 0x5c75da81, 0x2b017]
- stimulus_b = [0xc038ed3a, 0xb328cd45, 0x114f3db, 0x2f642a39, 0xff3807ab]
- yield from run_test(dut, stimulus_a, stimulus_b, truediv, get_case)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- yield from run_corner_cases(dut, count, truediv, get_case)
- yield from run_edge_cases(dut, count, truediv, get_case)
-
-
-if __name__ == '__main__':
- dut = FPDIV(width=32)
- run_simulation(dut, testbench(dut), vcd_name="test_div.vcd")
-
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-
-from nmigen_div_experiment import FPDIV
-
-class ORGate:
- def __init__(self):
- self.a = Signal()
- self.b = Signal()
- self.x = Signal()
-
- def elaborate(self, platform=None):
-
- m = Module()
- m.d.comb += self.x.eq(self.a | self.b)
-
- return m
-
-def check_case(dut, a, b, z):
- yield dut.in_a.v.eq(a)
- yield dut.in_a.stb.eq(1)
- yield
- yield
- a_ack = (yield dut.in_a.ack)
- assert a_ack == 0
- yield dut.in_b.v.eq(b)
- yield dut.in_b.stb.eq(1)
- b_ack = (yield dut.in_b.ack)
- assert b_ack == 0
-
- while True:
- yield
- out_z_stb = (yield dut.out_z.stb)
- if not out_z_stb:
- continue
- yield dut.in_a.stb.eq(0)
- yield dut.in_b.stb.eq(0)
- yield dut.out_z.ack.eq(1)
- yield
- yield dut.out_z.ack.eq(0)
- yield
- yield
- break
-
- out_z = yield dut.out_z.v
- assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
-
-def testbench(dut):
- yield from check_case(dut, 0x4008000000000000, 0x3FF0000000000000,
- 0x4008000000000000)
- yield from check_case(dut, 0x3FF0000000000000, 0x4008000000000000,
- 0x3FD5555555555555)
-
- if False:
- yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000)
- yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB)
- yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C)
- yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2)
- yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8)
- yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC)
- yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5)
- yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2)
-
-if __name__ == '__main__':
- dut = FPDIV(width=64)
- run_simulation(dut, testbench(dut), vcd_name="test_div64.vcd")
-
+++ /dev/null
-from sfpy import Float32
-from nmigen.compat.sim import run_simulation
-from dual_add_experiment import ALU
-
-
-def get_case(dut, a, b, c):
- yield dut.a.v.eq(a)
- yield dut.a.stb.eq(1)
- yield
- yield
- a_ack = (yield dut.a.ack)
- assert a_ack == 0
-
- yield dut.a.stb.eq(0)
-
- yield dut.b.v.eq(b)
- yield dut.b.stb.eq(1)
- yield
- yield
- b_ack = (yield dut.b.ack)
- assert b_ack == 0
-
- yield dut.b.stb.eq(0)
-
- yield dut.c.v.eq(c)
- yield dut.c.stb.eq(1)
- yield
- yield
- c_ack = (yield dut.c.ack)
- assert c_ack == 0
-
- yield dut.c.stb.eq(0)
-
- yield dut.z.ack.eq(1)
-
- while True:
- out_z_stb = (yield dut.z.stb)
- if not out_z_stb:
- yield
- continue
-
- out_z = yield dut.z.v
-
- yield dut.z.ack.eq(0)
- break
-
- return out_z
-
-def check_case(dut, a, b, c, z):
- out_z = yield from get_case(dut, a, b, c)
- assert out_z == z, "Output z 0x%x != 0x%x" % (out_z, z)
-
-def testbench(dut):
- yield from check_case(dut, 0, 0, 0, 0)
- yield from check_case(dut, 0x3F800000, 0x40000000, 0xc0000000, 0x3F800000)
-
-if __name__ == '__main__':
- dut = ALU(width=32)
- run_simulation(dut, testbench(dut), vcd_name="test_dual_add.vcd")
-
+++ /dev/null
-""" key strategic example showing how to do multi-input fan-in into a
- multi-stage pipeline, then multi-output fanout.
-
- the multiplex ID from the fan-in is passed in to the pipeline, preserved,
- and used as a routing ID on the fanout.
-"""
-
-from random import randint
-from math import log
-from nmigen import Module, Signal, Cat, Value
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-
-from nmigen_add_experiment import (FPADDMuxInOut,)
-
-from sfpy import Float32
-
-class InputTest:
- def __init__(self, dut):
- self.dut = dut
- self.di = {}
- self.do = {}
- self.tlen = 10
- self.width = 32
- for mid in range(dut.num_rows):
- self.di[mid] = {}
- self.do[mid] = []
- for i in range(self.tlen):
- op1 = randint(0, (1<<self.width)-1)
- op2 = randint(0, (1<<self.width)-1)
- #op1 = 0x40900000
- #op2 = 0x40200000
- res = Float32(op1) + Float32(op2)
- self.di[mid][i] = (op1, op2)
- self.do[mid].append(res.bits)
-
- def send(self, mid):
- for i in range(self.tlen):
- op1, op2 = self.di[mid][i]
- rs = dut.p[mid]
- yield rs.valid_i.eq(1)
- yield rs.data_i.a.eq(op1)
- yield rs.data_i.b.eq(op2)
- yield rs.data_i.mid.eq(mid)
- yield
- o_p_ready = yield rs.ready_o
- while not o_p_ready:
- yield
- o_p_ready = yield rs.ready_o
-
- fop1 = Float32(op1)
- fop2 = Float32(op2)
- res = fop1 + fop2
- print ("send", mid, i, hex(op1), hex(op2), hex(res.bits),
- fop1, fop2, res)
-
- yield rs.valid_i.eq(0)
- # wait random period of time before queueing another value
- for i in range(randint(0, 3)):
- yield
-
- yield rs.valid_i.eq(0)
- yield
-
- print ("send ended", mid)
-
- ## wait random period of time before queueing another value
- #for i in range(randint(0, 3)):
- # yield
-
- #send_range = randint(0, 3)
- #if send_range == 0:
- # send = True
- #else:
- # send = randint(0, send_range) != 0
-
- def rcv(self, mid):
- while True:
- #stall_range = randint(0, 3)
- #for j in range(randint(1,10)):
- # stall = randint(0, stall_range) != 0
- # yield self.dut.n[0].ready_i.eq(stall)
- # yield
- n = self.dut.n[mid]
- yield n.ready_i.eq(1)
- yield
- o_n_valid = yield n.valid_o
- i_n_ready = yield n.ready_i
- if not o_n_valid or not i_n_ready:
- continue
-
- out_mid = yield n.data_o.mid
- out_z = yield n.data_o.z
-
- out_i = 0
-
- print ("recv", out_mid, hex(out_z), "expected",
- hex(self.do[mid][out_i] ))
-
- # see if this output has occurred already, delete it if it has
- assert mid == out_mid, "out_mid %d not correct %d" % (out_mid, mid)
- assert self.do[mid][out_i] == out_z
- del self.do[mid][out_i]
-
- # check if there's any more outputs
- if len(self.do[mid]) == 0:
- break
- print ("recv ended", mid)
-
-
-
-if __name__ == '__main__':
- dut = FPADDMuxInOut(32, 4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_fpadd_pipe.il", "w") as f:
- f.write(vl)
- #run_simulation(dut, testbench(dut), vcd_name="test_inputgroup.vcd")
-
- test = InputTest(dut)
- run_simulation(dut, [test.rcv(1), test.rcv(0),
- test.rcv(3), test.rcv(2),
- test.send(0), test.send(1),
- test.send(3), test.send(2),
- ],
- vcd_name="test_fpadd_pipe.vcd")
-
+++ /dev/null
-from random import randint
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-
-from fpbase import FPNum
-
-class FPNumModShiftMulti:
- def __init__(self, width):
- self.a = FPNum(width)
- self.ediff = Signal((self.a.e_width, True))
-
- def elaborate(self, platform=None):
-
- m = Module()
- #m.d.sync += self.a.decode(self.a.v)
- m.d.sync += self.a.shift_down_multi(self.ediff)
-
- return m
-
-def check_case(dut, width, e_width, m, e, i):
- yield dut.a.m.eq(m)
- yield dut.a.e.eq(e)
- yield dut.ediff.eq(i)
- yield
- yield
-
- out_m = yield dut.a.m
- out_e = yield dut.a.e
- ed = yield dut.ediff
- calc_e = (e + i)
- print (e, bin(m), out_e, calc_e, bin(out_m), i, ed)
-
- calc_m = ((m >> (i+1)) << 1) | (m & 1)
- for l in range(i):
- if m & (1<<(l+1)):
- calc_m |= 1
-
- assert out_e == calc_e, "Output e 0x%x != expected 0x%x" % (out_e, calc_e)
- assert out_m == calc_m, "Output m 0x%x != expected 0x%x" % (out_m, calc_m)
-
-def testbench(dut):
- m_width = dut.a.m_width
- e_width = dut.a.e_width
- e_max = dut.a.e_max
- for j in range(200):
- m = randint(0, (1<<m_width)-1)
- zeros = randint(0, 31)
- for i in range(zeros):
- m &= ~(1<<i)
- e = randint(-e_max, e_max)
- for i in range(32):
- yield from check_case(dut, m_width, e_width, m, e, i)
-
-if __name__ == '__main__':
- dut = FPNumModShiftMulti(width=32)
- run_simulation(dut, testbench(dut), vcd_name="test_multishift.vcd")
-
- #dut = MultiShiftModL(width=32)
- #run_simulation(dut, testbench(dut), vcd_name="test_multishift.vcd")
-
+++ /dev/null
-# IEEE Floating Point Divider (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Const, Cat, Elaboratable
-from nmigen.cli import main, verilog, rtlil
-from nmigen.compat.sim import run_simulation
-
-
-from fpbase import FPNumIn, FPNumOut, FPOpIn, FPOpOut, FPBase, FPState
-from nmoperator import eq
-from singlepipe import SimpleHandshake, ControlBase
-from test_buf_pipe import data_chain2, Test5
-
-
-class FPDIV(FPBase, Elaboratable):
-
- def __init__(self, width):
- FPBase.__init__(self)
- self.width = width
-
- self.p = FPOpIn(width)
- self.n = FPOpOut(width)
-
- self.p.data_i = self.ispec()
- self.n.data_o = self.ospec()
-
- self.states = []
-
- def ispec(self):
- return Signal(self.width, name="a")
-
- def ospec(self):
- return Signal(self.width, name="z")
-
- def setup(self, m, i):
- m.d.comb += self.p.v.eq(i) # connect input
-
- def process(self, i):
- return self.n.v # return z output
-
- def add_state(self, state):
- self.states.append(state)
- return state
-
- def elaborate(self, platform=None):
- """ creates the HDL code-fragment for FPDiv
- """
- m = Module()
-
- # Latches
- a = FPNumIn(None, self.width, False)
- z = FPNumOut(self.width, False)
-
- m.submodules.p = self.p
- m.submodules.n = self.n
- m.submodules.a = a
- m.submodules.z = z
-
- m.d.comb += a.v.eq(self.p.v)
-
- with m.FSM() as fsm:
-
- # ******
- # gets operand a
-
- with m.State("get_a"):
- res = self.get_op(m, self.p, a, "add_1")
- m.d.sync += eq([a, self.p.ready_o], res)
-
- with m.State("add_1"):
- m.next = "pack"
- m.d.sync += [
- z.s.eq(a.s), # sign
- z.e.eq(a.e), # exponent
- z.m.eq(a.m + 1), # mantissa
- ]
-
- # ******
- # pack stage
-
- with m.State("pack"):
- self.pack(m, z, "put_z")
-
- # ******
- # put_z stage
-
- with m.State("put_z"):
- self.put_z(m, z, self.n, "get_a")
-
- return m
-
-class FPDIVPipe(ControlBase):
-
- def __init__(self, width):
- self.width = width
- self.fpdiv = FPDIV(width=width)
- ControlBase.__init__(self, self.fpdiv)
-
- def elaborate(self, platform):
- self.m = m = ControlBase.elaborate(self, platform)
-
- m.submodules.fpdiv = self.fpdiv
-
- # see if connecting to stb/ack works
- m.d.comb += self.fpdiv.p._connect_in(self.p)
- m.d.comb += self.fpdiv.n._connect_out(self.n, do_data=False)
- m.d.comb += self.n.data_o.eq(self.data_r)
-
- return m
-
-def resultfn(data_o, expected, i, o):
- res = expected + 1
- assert data_o == res, \
- "%d-%d received data %x not match expected %x\n" \
- % (i, o, data_o, res)
-
-
-if __name__ == "__main__":
- dut = FPDIVPipe(width=16)
- data = data_chain2()
- ports = dut.ports()
- vl = rtlil.convert(dut, ports=ports)
- with open("test_fsm_experiment.il", "w") as f:
- f.write(vl)
- test = Test5(dut, resultfn, data=data)
- run_simulation(dut, [test.send, test.rcv],
- vcd_name="test_fsm_experiment.vcd")
-
+++ /dev/null
-""" key strategic example showing how to do multi-input fan-in into a
- multi-stage pipeline, then multi-output fanout.
-
- the multiplex ID from the fan-in is passed in to the pipeline, preserved,
- and used as a routing ID on the fanout.
-"""
-
-from random import randint
-from math import log
-from nmigen import Module, Signal, Cat, Value, Elaboratable
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-
-from multipipe import CombMultiOutPipeline, CombMuxOutPipe
-from multipipe import PriorityCombMuxInPipe
-from singlepipe import SimpleHandshake, RecordObject, Object
-
-
-class PassData2(RecordObject):
- def __init__(self):
- RecordObject.__init__(self)
- self.mid = Signal(2, reset_less=True)
- self.idx = Signal(8, reset_less=True)
- self.data = Signal(16, reset_less=True)
-
-
-class PassData(Object):
- def __init__(self):
- Object.__init__(self)
- self.mid = Signal(2, reset_less=True)
- self.idx = Signal(8, reset_less=True)
- self.data = Signal(16, reset_less=True)
-
-
-
-class PassThroughStage:
- def ispec(self):
- return PassData()
- def ospec(self):
- return self.ispec() # same as ospec
-
- def process(self, i):
- return i # pass-through
-
-
-
-class PassThroughPipe(SimpleHandshake):
- def __init__(self):
- SimpleHandshake.__init__(self, PassThroughStage())
-
-
-class InputTest:
- def __init__(self, dut):
- self.dut = dut
- self.di = {}
- self.do = {}
- self.tlen = 100
- for mid in range(dut.num_rows):
- self.di[mid] = {}
- self.do[mid] = {}
- for i in range(self.tlen):
- self.di[mid][i] = randint(0, 255) + (mid<<8)
- self.do[mid][i] = self.di[mid][i]
-
- def send(self, mid):
- for i in range(self.tlen):
- op2 = self.di[mid][i]
- rs = dut.p[mid]
- yield rs.valid_i.eq(1)
- yield rs.data_i.data.eq(op2)
- yield rs.data_i.idx.eq(i)
- yield rs.data_i.mid.eq(mid)
- yield
- o_p_ready = yield rs.ready_o
- while not o_p_ready:
- yield
- o_p_ready = yield rs.ready_o
-
- print ("send", mid, i, hex(op2))
-
- yield rs.valid_i.eq(0)
- # wait random period of time before queueing another value
- for i in range(randint(0, 3)):
- yield
-
- yield rs.valid_i.eq(0)
- yield
-
- print ("send ended", mid)
-
- ## wait random period of time before queueing another value
- #for i in range(randint(0, 3)):
- # yield
-
- #send_range = randint(0, 3)
- #if send_range == 0:
- # send = True
- #else:
- # send = randint(0, send_range) != 0
-
- def rcv(self, mid):
- while True:
- #stall_range = randint(0, 3)
- #for j in range(randint(1,10)):
- # stall = randint(0, stall_range) != 0
- # yield self.dut.n[0].ready_i.eq(stall)
- # yield
- n = self.dut.n[mid]
- yield n.ready_i.eq(1)
- yield
- o_n_valid = yield n.valid_o
- i_n_ready = yield n.ready_i
- if not o_n_valid or not i_n_ready:
- continue
-
- out_mid = yield n.data_o.mid
- out_i = yield n.data_o.idx
- out_v = yield n.data_o.data
-
- print ("recv", out_mid, out_i, hex(out_v))
-
- # see if this output has occurred already, delete it if it has
- assert mid == out_mid, "out_mid %d not correct %d" % (out_mid, mid)
- assert out_i in self.do[mid], "out_i %d not in array %s" % \
- (out_i, repr(self.do[mid]))
- assert self.do[mid][out_i] == out_v # pass-through data
- del self.do[mid][out_i]
-
- # check if there's any more outputs
- if len(self.do[mid]) == 0:
- break
- print ("recv ended", mid)
-
-
-class TestPriorityMuxPipe(PriorityCombMuxInPipe):
- def __init__(self, num_rows):
- self.num_rows = num_rows
- stage = PassThroughStage()
- PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
-
-
-class OutputTest:
- def __init__(self, dut):
- self.dut = dut
- self.di = []
- self.do = {}
- self.tlen = 100
- for i in range(self.tlen * dut.num_rows):
- if i < dut.num_rows:
- mid = i
- else:
- mid = randint(0, dut.num_rows-1)
- data = randint(0, 255) + (mid<<8)
-
- def send(self):
- for i in range(self.tlen * dut.num_rows):
- op2 = self.di[i][0]
- mid = self.di[i][1]
- rs = dut.p
- yield rs.valid_i.eq(1)
- yield rs.data_i.data.eq(op2)
- yield rs.data_i.mid.eq(mid)
- yield
- o_p_ready = yield rs.ready_o
- while not o_p_ready:
- yield
- o_p_ready = yield rs.ready_o
-
- print ("send", mid, i, hex(op2))
-
- yield rs.valid_i.eq(0)
- # wait random period of time before queueing another value
- for i in range(randint(0, 3)):
- yield
-
- yield rs.valid_i.eq(0)
-
-
-class TestMuxOutPipe(CombMuxOutPipe):
- def __init__(self, num_rows):
- self.num_rows = num_rows
- stage = PassThroughStage()
- CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
-
-
-class TestInOutPipe(Elaboratable):
- def __init__(self, num_rows=4):
- self.num_rows = num_rows
- self.inpipe = TestPriorityMuxPipe(num_rows) # fan-in (combinatorial)
- self.pipe1 = PassThroughPipe() # stage 1 (clock-sync)
- self.pipe2 = PassThroughPipe() # stage 2 (clock-sync)
- self.outpipe = TestMuxOutPipe(num_rows) # fan-out (combinatorial)
-
- self.p = self.inpipe.p # kinda annoying,
- self.n = self.outpipe.n # use pipe in/out as this class in/out
- self._ports = self.inpipe.ports() + self.outpipe.ports()
-
- def elaborate(self, platform):
- m = Module()
- m.submodules.inpipe = self.inpipe
- m.submodules.pipe1 = self.pipe1
- m.submodules.pipe2 = self.pipe2
- m.submodules.outpipe = self.outpipe
-
- m.d.comb += self.inpipe.n.connect_to_next(self.pipe1.p)
- m.d.comb += self.pipe1.connect_to_next(self.pipe2)
- m.d.comb += self.pipe2.connect_to_next(self.outpipe)
-
- return m
-
- def ports(self):
- return self._ports
-
-
-if __name__ == '__main__':
- dut = TestInOutPipe()
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_inoutmux_pipe.il", "w") as f:
- f.write(vl)
- #run_simulation(dut, testbench(dut), vcd_name="test_inputgroup.vcd")
-
- test = InputTest(dut)
- run_simulation(dut, [test.rcv(1), test.rcv(0),
- test.rcv(3), test.rcv(2),
- test.send(0), test.send(1),
- test.send(3), test.send(2),
- ],
- vcd_name="test_inoutmux_pipe.vcd")
-
+++ /dev/null
-from random import randint
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-
-from inputgroup import InputGroup
-
-
-def testbench(dut):
- stb = yield dut.out_op.stb
- assert stb == 0
- ack = yield dut.out_op.ack
- assert ack == 0
-
- # set row 1 input 0
- yield dut.rs[1].in_op[0].eq(5)
- yield dut.rs[1].stb.eq(0b01) # strobe indicate 1st op ready
- #yield dut.rs[1].ack.eq(1)
- yield
-
- # check row 1 output (should be inactive)
- decode = yield dut.rs[1].out_decode
- assert decode == 0
- if False:
- op0 = yield dut.rs[1].out_op[0]
- op1 = yield dut.rs[1].out_op[1]
- assert op0 == 0 and op1 == 0
-
- # output should be inactive
- out_stb = yield dut.out_op.stb
- assert out_stb == 1
-
- # set row 0 input 1
- yield dut.rs[1].in_op[1].eq(6)
- yield dut.rs[1].stb.eq(0b11) # strobe indicate both ops ready
-
- # set acknowledgement of output... takes 1 cycle to respond
- yield dut.out_op.ack.eq(1)
- yield
- yield dut.out_op.ack.eq(0) # clear ack on output
- yield dut.rs[1].stb.eq(0) # clear row 1 strobe
-
- # output strobe should be active, MID should be 0 until "ack" is set...
- out_stb = yield dut.out_op.stb
- assert out_stb == 1
- out_mid = yield dut.mid
- assert out_mid == 0
-
- # ... and output should not yet be passed through either
- op0 = yield dut.out_op.v[0]
- op1 = yield dut.out_op.v[1]
- assert op0 == 0 and op1 == 0
-
- # wait for out_op.ack to activate...
- yield dut.rs[1].stb.eq(0b00) # set row 1 strobes to zero
- yield
-
- # *now* output should be passed through
- op0 = yield dut.out_op.v[0]
- op1 = yield dut.out_op.v[1]
- assert op0 == 5 and op1 == 6
-
- # set row 2 input
- yield dut.rs[2].in_op[0].eq(3)
- yield dut.rs[2].in_op[1].eq(4)
- yield dut.rs[2].stb.eq(0b11) # strobe indicate 1st op ready
- yield dut.out_op.ack.eq(1) # set output ack
- yield
- yield dut.rs[2].stb.eq(0) # clear row 2 strobe
- yield dut.out_op.ack.eq(0) # set output ack
- yield
- op0 = yield dut.out_op.v[0]
- op1 = yield dut.out_op.v[1]
- assert op0 == 3 and op1 == 4, "op0 %d op1 %d" % (op0, op1)
- out_mid = yield dut.mid
- assert out_mid == 2
-
- # set row 0 and 3 input
- yield dut.rs[0].in_op[0].eq(9)
- yield dut.rs[0].in_op[1].eq(8)
- yield dut.rs[0].stb.eq(0b11) # strobe indicate 1st op ready
- yield dut.rs[3].in_op[0].eq(1)
- yield dut.rs[3].in_op[1].eq(2)
- yield dut.rs[3].stb.eq(0b11) # strobe indicate 1st op ready
-
- # set acknowledgement of output... takes 1 cycle to respond
- yield dut.out_op.ack.eq(1)
- yield
- yield dut.rs[0].stb.eq(0) # clear row 1 strobe
- yield
- out_mid = yield dut.mid
- assert out_mid == 0, "out mid %d" % out_mid
-
- yield
- yield dut.rs[3].stb.eq(0) # clear row 1 strobe
- yield dut.out_op.ack.eq(0) # clear ack on output
- yield
- out_mid = yield dut.mid
- assert out_mid == 3, "out mid %d" % out_mid
-
-
-class InputTest:
- def __init__(self, dut):
- self.dut = dut
- self.di = {}
- self.do = {}
- self.tlen = 10
- for mid in range(dut.num_rows):
- self.di[mid] = {}
- self.do[mid] = {}
- for i in range(self.tlen):
- self.di[mid][i] = randint(0, 100)
- self.do[mid][i] = self.di[mid][i]
-
- def send(self, mid):
- for i in range(self.tlen):
- op2 = self.di[mid][i]
- rs = dut.rs[mid]
- ack = yield rs.ack
- while not ack:
- yield
- ack = yield rs.ack
- yield rs.in_op[0].eq(i)
- yield rs.in_op[1].eq(op2)
- yield rs.stb.eq(0b11) # strobe indicate 1st op ready
- ack = yield rs.ack
- while ack:
- yield
- ack = yield rs.ack
- yield rs.stb.eq(0)
-
- # wait random period of time before queueing another value
- for i in range(randint(0, 8)):
- yield
-
- def recv(self):
- while True:
- stb = yield dut.out_op.stb
- yield dut.out_op.ack.eq(0)
- while not stb:
- yield dut.out_op.ack.eq(1)
- yield
- stb = yield dut.out_op.stb
-
- stb = yield dut.out_op.stb
- while stb:
- yield
- stb = yield dut.out_op.stb
- mid = yield dut.mid
- out_i = yield dut.out_op.v[0]
- out_v = yield dut.out_op.v[1]
-
- # see if this output has occurred already, delete it if it has
- assert out_i in self.do[mid]
- assert self.do[mid][out_i] == out_v
- del self.do[mid][out_i]
-
- # check if there's any more outputs
- zerolen = True
- for (k, v) in self.do.items():
- if v:
- zerolen = False
- if zerolen:
- break
-
-if __name__ == '__main__':
- dut = InputGroup(width=32)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_inputgroup.il", "w") as f:
- f.write(vl)
- run_simulation(dut, testbench(dut), vcd_name="test_inputgroup.vcd")
-
- dut = InputGroup(width=16)
- test = InputTest(dut)
- run_simulation(dut, [test.send(3), test.send(2),
- test.send(1), test.send(0),
- test.recv()],
- vcd_name="test_inputgroup_parallel.vcd")
-
+++ /dev/null
-import sys
-from random import randint
-from random import seed
-from operator import mul
-
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-
-from fmul import FPMUL
-
-from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan,
- is_inf, is_pos_inf, is_neg_inf,
- match, get_case, check_case, run_test,
- run_edge_cases, run_corner_cases)
-
-
-def testbench(dut):
- yield from check_case(dut, 0x40000000, 0x40000000, 0x40800000)
- yield from check_case(dut, 0x41400000, 0x40A00000, 0x42700000)
-
- count = 0
-
- #regression tests
- stimulus_a = [0xba57711a, 0xbf9b1e94, 0x34082401, 0x5e8ef81,
- 0x5c75da81, 0x2b017]
- stimulus_b = [0xee1818c5, 0xc038ed3a, 0xb328cd45, 0x114f3db,
- 0x2f642a39, 0xff3807ab]
- yield from run_test(dut, stimulus_a, stimulus_b, mul, get_case)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- yield from run_corner_cases(dut, count, mul, get_case)
- yield from run_edge_cases(dut, count, mul, get_case)
-
-
-if __name__ == '__main__':
- dut = FPMUL(width=32)
- run_simulation(dut, testbench(dut), vcd_name="test_mul.vcd")
-
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-from operator import mul
-
-from fmul import FPMUL
-
-import sys
-import atexit
-from random import randint
-from random import seed
-
-from unit_test_double import (get_mantissa, get_exponent, get_sign, is_nan,
- is_inf, is_pos_inf, is_neg_inf,
- match, get_case, check_case, run_test,
- run_edge_cases, run_corner_cases)
-
-
-def testbench(dut):
- yield from check_case(dut, 0, 0, 0)
-
- count = 0
-
- #regression tests
- stimulus_a = [0xff80000000000000, 0x3351099a0528e138]
- stimulus_b = [0x7f80000000000000, 0xd651a9a9986af2b5]
- yield from run_test(dut, stimulus_a, stimulus_b, mul)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- yield from run_corner_cases(dut, count, mul)
- yield from run_edge_cases(dut, count, mul)
-
-
-if __name__ == '__main__':
- dut = FPMUL(width=64)
- run_simulation(dut, testbench(dut), vcd_name="test_mul64.vcd")
-
+++ /dev/null
-from random import randint
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-
-from fpbase import MultiShift, MultiShiftR, MultiShiftRMerge
-
-class MultiShiftModL:
- def __init__(self, width):
- self.ms = MultiShift(width)
- self.a = Signal(width)
- self.b = Signal(self.ms.smax)
- self.x = Signal(width)
-
- def elaborate(self, platform=None):
-
- m = Module()
- m.d.comb += self.x.eq(self.ms.lshift(self.a, self.b))
-
- return m
-
-class MultiShiftModR:
- def __init__(self, width):
- self.ms = MultiShift(width)
- self.a = Signal(width)
- self.b = Signal(self.ms.smax)
- self.x = Signal(width)
-
- def elaborate(self, platform=None):
-
- m = Module()
- m.d.comb += self.x.eq(self.ms.rshift(self.a, self.b))
-
- return m
-
-class MultiShiftModRMod:
- def __init__(self, width):
- self.ms = MultiShiftR(width)
- self.a = Signal(width)
- self.b = Signal(self.ms.smax)
- self.x = Signal(width)
-
- def elaborate(self, platform=None):
-
- m = Module()
- m.submodules += self.ms
- m.d.comb += self.ms.i.eq(self.a)
- m.d.comb += self.ms.s.eq(self.b)
- m.d.comb += self.x.eq(self.ms.o)
-
- return m
-
-class MultiShiftRMergeMod:
- def __init__(self, width):
- self.ms = MultiShiftRMerge(width)
- self.a = Signal(width)
- self.b = Signal(self.ms.smax)
- self.x = Signal(width)
-
- def elaborate(self, platform=None):
-
- m = Module()
- m.submodules += self.ms
- m.d.comb += self.ms.inp.eq(self.a)
- m.d.comb += self.ms.diff.eq(self.b)
- m.d.comb += self.x.eq(self.ms.m)
-
- return m
-
-
-def check_case(dut, width, a, b):
- yield dut.a.eq(a)
- yield dut.b.eq(b)
- yield
-
- x = (a << b) & ((1<<width)-1)
-
- out_x = yield dut.x
- assert out_x == x, "Output x 0x%x not equal to expected 0x%x" % (out_x, x)
-
-def check_caser(dut, width, a, b):
- yield dut.a.eq(a)
- yield dut.b.eq(b)
- yield
-
- x = (a >> b) & ((1<<width)-1)
-
- out_x = yield dut.x
- assert out_x == x, "Output x 0x%x not equal to expected 0x%x" % (out_x, x)
-
-
-def check_case_merge(dut, width, a, b):
- yield dut.a.eq(a)
- yield dut.b.eq(b)
- yield
-
- x = (a >> b) & ((1<<width)-1) # actual shift
- if (a & ((2<<b)-1)) != 0: # mask for sticky bit
- x |= 1 # set LSB
-
- out_x = yield dut.x
- assert out_x == x, \
- "\nshift %d\nInput\n%+32s\nOutput x\n%+32s != \n%+32s" % \
- (b, bin(a), bin(out_x), bin(x))
-
-def testmerge(dut):
- for i in range(32):
- for j in range(1000):
- a = randint(0, (1<<32)-1)
- yield from check_case_merge(dut, 32, a, i)
-
-def testbench(dut):
- for i in range(32):
- for j in range(1000):
- a = randint(0, (1<<32)-1)
- yield from check_case(dut, 32, a, i)
-
-def testbenchr(dut):
- for i in range(32):
- for j in range(1000):
- a = randint(0, (1<<32)-1)
- yield from check_caser(dut, 32, a, i)
-
-if __name__ == '__main__':
- dut = MultiShiftRMergeMod(width=32)
- run_simulation(dut, testmerge(dut), vcd_name="test_multishiftmerge.vcd")
- dut = MultiShiftModRMod(width=32)
- run_simulation(dut, testbenchr(dut), vcd_name="test_multishift.vcd")
-
- dut = MultiShiftModR(width=32)
- run_simulation(dut, testbenchr(dut), vcd_name="test_multishift.vcd")
-
- dut = MultiShiftModL(width=32)
- run_simulation(dut, testbench(dut), vcd_name="test_multishift.vcd")
-
+++ /dev/null
-from random import randint
-from math import log
-from nmigen import Module, Signal, Cat, Elaboratable
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-
-from multipipe import CombMuxOutPipe
-from singlepipe import SimpleHandshake, PassThroughHandshake, RecordObject
-
-
-class PassInData(RecordObject):
- def __init__(self):
- RecordObject.__init__(self)
- self.mid = Signal(2, reset_less=True)
- self.data = Signal(16, reset_less=True)
-
-
-class PassThroughStage:
-
- def ispec(self):
- return PassInData()
-
- def ospec(self, name):
- return Signal(16, name="%s_dout" % name, reset_less=True)
-
- def process(self, i):
- return i.data
-
-
-class PassThroughDataStage:
- def ispec(self):
- return PassInData()
- def ospec(self):
- return self.ispec() # same as ospec
-
- def process(self, i):
- return i # pass-through
-
-
-
-class PassThroughPipe(PassThroughHandshake):
- def __init__(self):
- PassThroughHandshake.__init__(self, PassThroughDataStage())
-
-
-class OutputTest:
- def __init__(self, dut):
- self.dut = dut
- self.di = []
- self.do = {}
- self.tlen = 10
- for i in range(self.tlen * dut.num_rows):
- if i < dut.num_rows:
- mid = i
- else:
- mid = randint(0, dut.num_rows-1)
- data = randint(0, 255) + (mid<<8)
- if mid not in self.do:
- self.do[mid] = []
- self.di.append((data, mid))
- self.do[mid].append(data)
-
- def send(self):
- for i in range(self.tlen * dut.num_rows):
- op2 = self.di[i][0]
- mid = self.di[i][1]
- rs = dut.p
- yield rs.valid_i.eq(1)
- yield rs.data_i.data.eq(op2)
- yield rs.data_i.mid.eq(mid)
- yield
- o_p_ready = yield rs.ready_o
- while not o_p_ready:
- yield
- o_p_ready = yield rs.ready_o
-
- print ("send", mid, i, hex(op2))
-
- yield rs.valid_i.eq(0)
- # wait random period of time before queueing another value
- for i in range(randint(0, 3)):
- yield
-
- yield rs.valid_i.eq(0)
-
- def rcv(self, mid):
- out_i = 0
- count = 0
- stall_range = randint(0, 3)
- while out_i != len(self.do[mid]):
- count += 1
- assert count != 2000, "timeout: too long"
- n = self.dut.n[mid]
- yield n.ready_i.eq(1)
- yield
- o_n_valid = yield n.valid_o
- i_n_ready = yield n.ready_i
- if not o_n_valid or not i_n_ready:
- continue
-
- out_v = yield n.data_o
-
- print ("recv", mid, out_i, hex(out_v))
-
- assert self.do[mid][out_i] == out_v # pass-through data
-
- out_i += 1
-
- if randint(0, 5) == 0:
- stall_range = randint(0, 3)
- stall = randint(0, stall_range) != 0
- if stall:
- yield n.ready_i.eq(0)
- for i in range(stall_range):
- yield
-
-
-class TestPriorityMuxPipe(CombMuxOutPipe):
- def __init__(self, num_rows):
- self.num_rows = num_rows
- stage = PassThroughStage()
- CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
-
-
-class TestSyncToPriorityPipe(Elaboratable):
- def __init__(self):
- self.num_rows = 4
- self.pipe = PassThroughPipe()
- self.muxpipe = TestPriorityMuxPipe(self.num_rows)
-
- self.p = self.pipe.p
- self.n = self.muxpipe.n
-
- def elaborate(self, platform):
- m = Module()
- m.submodules.pipe = self.pipe
- m.submodules.muxpipe = self.muxpipe
- m.d.comb += self.pipe.n.connect_to_next(self.muxpipe.p)
- return m
-
- def ports(self):
- res = [self.p.valid_i, self.p.ready_o] + \
- self.p.data_i.ports()
- for i in range(len(self.n)):
- res += [self.n[i].ready_i, self.n[i].valid_o] + \
- [self.n[i].data_o]
- #self.n[i].data_o.ports()
- return res
-
-
-if __name__ == '__main__':
- dut = TestSyncToPriorityPipe()
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_outmux_pipe.il", "w") as f:
- f.write(vl)
-
- test = OutputTest(dut)
- run_simulation(dut, [test.rcv(1), test.rcv(0),
- test.rcv(3), test.rcv(2),
- test.send()],
- vcd_name="test_outmux_pipe.vcd")
-
+++ /dev/null
-from random import randint
-from math import log
-from nmigen import Module, Signal, Cat
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-
-from singlepipe import PassThroughStage
-from multipipe import (CombMultiInPipeline, PriorityCombMuxInPipe)
-
-
-class PassData:
- def __init__(self):
- self.mid = Signal(2, reset_less=True)
- self.idx = Signal(6, reset_less=True)
- self.data = Signal(16, reset_less=True)
-
- def eq(self, i):
- return [self.mid.eq(i.mid), self.idx.eq(i.idx), self.data.eq(i.data)]
-
- def ports(self):
- return [self.mid, self.idx, self.data]
-
-
-def testbench(dut):
- stb = yield dut.out_op.stb
- assert stb == 0
- ack = yield dut.out_op.ack
- assert ack == 0
-
- # set row 1 input 0
- yield dut.rs[1].in_op[0].eq(5)
- yield dut.rs[1].stb.eq(0b01) # strobe indicate 1st op ready
- #yield dut.rs[1].ack.eq(1)
- yield
-
- # check row 1 output (should be inactive)
- decode = yield dut.rs[1].out_decode
- assert decode == 0
- if False:
- op0 = yield dut.rs[1].out_op[0]
- op1 = yield dut.rs[1].out_op[1]
- assert op0 == 0 and op1 == 0
-
- # output should be inactive
- out_stb = yield dut.out_op.stb
- assert out_stb == 1
-
- # set row 0 input 1
- yield dut.rs[1].in_op[1].eq(6)
- yield dut.rs[1].stb.eq(0b11) # strobe indicate both ops ready
-
- # set acknowledgement of output... takes 1 cycle to respond
- yield dut.out_op.ack.eq(1)
- yield
- yield dut.out_op.ack.eq(0) # clear ack on output
- yield dut.rs[1].stb.eq(0) # clear row 1 strobe
-
- # output strobe should be active, MID should be 0 until "ack" is set...
- out_stb = yield dut.out_op.stb
- assert out_stb == 1
- out_mid = yield dut.mid
- assert out_mid == 0
-
- # ... and output should not yet be passed through either
- op0 = yield dut.out_op.v[0]
- op1 = yield dut.out_op.v[1]
- assert op0 == 0 and op1 == 0
-
- # wait for out_op.ack to activate...
- yield dut.rs[1].stb.eq(0b00) # set row 1 strobes to zero
- yield
-
- # *now* output should be passed through
- op0 = yield dut.out_op.v[0]
- op1 = yield dut.out_op.v[1]
- assert op0 == 5 and op1 == 6
-
- # set row 2 input
- yield dut.rs[2].in_op[0].eq(3)
- yield dut.rs[2].in_op[1].eq(4)
- yield dut.rs[2].stb.eq(0b11) # strobe indicate 1st op ready
- yield dut.out_op.ack.eq(1) # set output ack
- yield
- yield dut.rs[2].stb.eq(0) # clear row 2 strobe
- yield dut.out_op.ack.eq(0) # set output ack
- yield
- op0 = yield dut.out_op.v[0]
- op1 = yield dut.out_op.v[1]
- assert op0 == 3 and op1 == 4, "op0 %d op1 %d" % (op0, op1)
- out_mid = yield dut.mid
- assert out_mid == 2
-
- # set row 0 and 3 input
- yield dut.rs[0].in_op[0].eq(9)
- yield dut.rs[0].in_op[1].eq(8)
- yield dut.rs[0].stb.eq(0b11) # strobe indicate 1st op ready
- yield dut.rs[3].in_op[0].eq(1)
- yield dut.rs[3].in_op[1].eq(2)
- yield dut.rs[3].stb.eq(0b11) # strobe indicate 1st op ready
-
- # set acknowledgement of output... takes 1 cycle to respond
- yield dut.out_op.ack.eq(1)
- yield
- yield dut.rs[0].stb.eq(0) # clear row 1 strobe
- yield
- out_mid = yield dut.mid
- assert out_mid == 0, "out mid %d" % out_mid
-
- yield
- yield dut.rs[3].stb.eq(0) # clear row 1 strobe
- yield dut.out_op.ack.eq(0) # clear ack on output
- yield
- out_mid = yield dut.mid
- assert out_mid == 3, "out mid %d" % out_mid
-
-
-class InputTest:
- def __init__(self, dut):
- self.dut = dut
- self.di = {}
- self.do = {}
- self.tlen = 10
- for mid in range(dut.num_rows):
- self.di[mid] = {}
- self.do[mid] = {}
- for i in range(self.tlen):
- self.di[mid][i] = randint(0, 100) + (mid<<8)
- self.do[mid][i] = self.di[mid][i]
-
- def send(self, mid):
- for i in range(self.tlen):
- op2 = self.di[mid][i]
- rs = dut.p[mid]
- yield rs.valid_i.eq(1)
- yield rs.data_i.data.eq(op2)
- yield rs.data_i.idx.eq(i)
- yield rs.data_i.mid.eq(mid)
- yield
- o_p_ready = yield rs.ready_o
- while not o_p_ready:
- yield
- o_p_ready = yield rs.ready_o
-
- print ("send", mid, i, hex(op2))
-
- yield rs.valid_i.eq(0)
- # wait random period of time before queueing another value
- for i in range(randint(0, 3)):
- yield
-
- yield rs.valid_i.eq(0)
- ## wait random period of time before queueing another value
- #for i in range(randint(0, 3)):
- # yield
-
- #send_range = randint(0, 3)
- #if send_range == 0:
- # send = True
- #else:
- # send = randint(0, send_range) != 0
-
- def rcv(self):
- while True:
- #stall_range = randint(0, 3)
- #for j in range(randint(1,10)):
- # stall = randint(0, stall_range) != 0
- # yield self.dut.n[0].ready_i.eq(stall)
- # yield
- n = self.dut.n
- yield n.ready_i.eq(1)
- yield
- o_n_valid = yield n.valid_o
- i_n_ready = yield n.ready_i
- if not o_n_valid or not i_n_ready:
- continue
-
- mid = yield n.data_o.mid
- out_i = yield n.data_o.idx
- out_v = yield n.data_o.data
-
- print ("recv", mid, out_i, hex(out_v))
-
- # see if this output has occurred already, delete it if it has
- assert out_i in self.do[mid], "out_i %d not in array %s" % \
- (out_i, repr(self.do[mid]))
- assert self.do[mid][out_i] == out_v # pass-through data
- del self.do[mid][out_i]
-
- # check if there's any more outputs
- zerolen = True
- for (k, v) in self.do.items():
- if v:
- zerolen = False
- if zerolen:
- break
-
-
-class TestPriorityMuxPipe(PriorityCombMuxInPipe):
- def __init__(self):
- self.num_rows = 4
- def iospecfn(): return PassData()
- stage = PassThroughStage(iospecfn)
- PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
-
-
-if __name__ == '__main__':
- dut = TestPriorityMuxPipe()
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_inputgroup_multi.il", "w") as f:
- f.write(vl)
- #run_simulation(dut, testbench(dut), vcd_name="test_inputgroup.vcd")
-
- test = InputTest(dut)
- run_simulation(dut, [test.send(1), test.send(0),
- test.send(3), test.send(2),
- test.rcv()],
- vcd_name="test_inputgroup_multi.vcd")
-
+++ /dev/null
-from random import randint
-from random import seed
-from operator import add
-
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-
-from fadd_state import FPADD
-
-from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan,
- is_inf, is_pos_inf, is_neg_inf,
- match, get_case, check_case, run_test,
- run_edge_cases, run_corner_cases)
-
-def testbench(dut):
- yield from check_case(dut, 0xFFFFFFFF, 0xC63B800A, 0xFFC00000)
- yield from check_case(dut, 0xFF800000, 0x7F800000, 0xFFC00000)
- #yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
- yield from check_case(dut, 0x7F800000, 0xFF800000, 0xFFC00000)
- yield from check_case(dut, 0x42540000, 0xC2540000, 0x00000000)
- yield from check_case(dut, 0xC2540000, 0x42540000, 0x00000000)
- yield from check_case(dut, 0xfe34f995, 0xff5d59ad, 0xff800000)
- yield from check_case(dut, 0x82471f51, 0x243985f, 0x801c3790)
- yield from check_case(dut, 0, 0, 0)
- yield from check_case(dut, 0x40000000, 0xc0000000, 0x00000000)
- yield from check_case(dut, 0x3F800000, 0x40000000, 0x40400000)
- yield from check_case(dut, 0x40000000, 0x3F800000, 0x40400000)
- yield from check_case(dut, 0x447A0000, 0x4488B000, 0x4502D800)
- yield from check_case(dut, 0x463B800A, 0x42BA8A3D, 0x463CF51E)
- yield from check_case(dut, 0x42BA8A3D, 0x463B800A, 0x463CF51E)
- yield from check_case(dut, 0x463B800A, 0xC2BA8A3D, 0x463A0AF6)
- yield from check_case(dut, 0xC2BA8A3D, 0x463B800A, 0x463A0AF6)
- yield from check_case(dut, 0xC63B800A, 0x42BA8A3D, 0xC63A0AF6)
- yield from check_case(dut, 0x42BA8A3D, 0xC63B800A, 0xC63A0AF6)
- yield from check_case(dut, 0x7F800000, 0x00000000, 0x7F800000)
- yield from check_case(dut, 0x00000000, 0x7F800000, 0x7F800000)
- yield from check_case(dut, 0xFF800000, 0x00000000, 0xFF800000)
- yield from check_case(dut, 0x00000000, 0xFF800000, 0xFF800000)
- yield from check_case(dut, 0x7F800000, 0x7F800000, 0x7F800000)
- yield from check_case(dut, 0xFF800000, 0xFF800000, 0xFF800000)
- yield from check_case(dut, 0x00018643, 0x00FA72A4, 0x00FBF8E7)
- yield from check_case(dut, 0x001A2239, 0x00FA72A4, 0x010A4A6E)
- yield from check_case(dut, 0x3F7FFFFE, 0x3F7FFFFE, 0x3FFFFFFE)
- yield from check_case(dut, 0x7EFFFFEE, 0x7EFFFFEE, 0x7F7FFFEE)
- yield from check_case(dut, 0x7F7FFFEE, 0xFEFFFFEE, 0x7EFFFFEE)
- yield from check_case(dut, 0x7F7FFFEE, 0x756CA884, 0x7F7FFFFD)
- yield from check_case(dut, 0x7F7FFFEE, 0x758A0CF8, 0x7F7FFFFF)
- yield from check_case(dut, 0x42500000, 0x51A7A358, 0x51A7A358)
- yield from check_case(dut, 0x51A7A358, 0x42500000, 0x51A7A358)
- yield from check_case(dut, 0x4E5693A4, 0x42500000, 0x4E5693A5)
- yield from check_case(dut, 0x42500000, 0x4E5693A4, 0x4E5693A5)
- #yield from check_case(dut, 1, 0, 1)
- #yield from check_case(dut, 1, 1, 1)
-
- count = 0
-
- #regression tests
- stimulus_a = [0x22cb525a, 0x40000000, 0x83e73d5c, 0xbf9b1e94, 0x34082401,
- 0x5e8ef81, 0x5c75da81, 0x2b017]
- stimulus_b = [0xadd79efa, 0xC0000000, 0x1c800000, 0xc038ed3a, 0xb328cd45,
- 0x114f3db, 0x2f642a39, 0xff3807ab]
- yield from run_test(dut, stimulus_a, stimulus_b, add, get_case)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- yield from run_corner_cases(dut, count, add, get_case)
- yield from run_edge_cases(dut, count, add, get_case)
-
-if __name__ == '__main__':
- dut = FPADD(width=32, single_cycle=True)
- run_simulation(dut, testbench(dut), vcd_name="test_state_add.vcd")
-
+++ /dev/null
-from random import randint
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog
-
-from inputgroup import FPGetSyncOpsMod
-
-
-def testbench(dut):
- stb = yield dut.stb
- assert stb == 0
- ack = yield dut.ack
- assert ack == 0
-
- yield dut.in_op[0].eq(5)
- yield dut.stb.eq(0b01)
- yield dut.ack.eq(1)
- yield
- yield
- decode = yield dut.out_decode
- assert decode == 0
-
- op0 = yield dut.out_op[0]
- op1 = yield dut.out_op[1]
- assert op0 == 0 and op1 == 0
-
- yield dut.in_op[1].eq(6)
- yield dut.stb.eq(0b11)
- yield
- yield
-
- op0 = yield dut.out_op[0]
- op1 = yield dut.out_op[1]
- assert op0 == 5 and op1 == 6
-
- yield dut.ack.eq(0)
- yield
-
- op0 = yield dut.out_op[0]
- op1 = yield dut.out_op[1]
- assert op0 == 0 and op1 == 0
-
-if __name__ == '__main__':
- dut = FPGetSyncOpsMod(width=32)
- run_simulation(dut, testbench(dut), vcd_name="test_getsyncops.vcd")
- vl = verilog.convert(dut, ports=dut.ports())
- with open("test_getsyncops.v", "w") as f:
- f.write(vl)
+++ /dev/null
-import sys
-from random import randint
-from random import seed
-
-from sfpy import Float64
-
-def get_mantissa(x):
- return x & 0x000fffffffffffff
-
-def get_exponent(x):
- return ((x & 0x7ff0000000000000) >> 52) - 1023
-
-def get_sign(x):
- return ((x & 0x8000000000000000) >> 63)
-
-def is_nan(x):
- return get_exponent(x) == 1024 and get_mantissa(x) != 0
-
-def is_inf(x):
- return get_exponent(x) == 1024 and get_mantissa(x) == 0
-
-def is_pos_inf(x):
- return is_inf(x) and not get_sign(x)
-
-def is_neg_inf(x):
- return is_inf(x) and get_sign(x)
-
-def match(x, y):
- return (
- (is_pos_inf(x) and is_pos_inf(y)) or
- (is_neg_inf(x) and is_neg_inf(y)) or
- (is_nan(x) and is_nan(y)) or
- (x == y)
- )
-
-def get_case(dut, a, b):
- yield dut.in_a.v.eq(a)
- yield dut.in_a.stb.eq(1)
- yield
- yield
- a_ack = (yield dut.in_a.ack)
- assert a_ack == 0
- yield dut.in_b.v.eq(b)
- yield dut.in_b.stb.eq(1)
- b_ack = (yield dut.in_b.ack)
- assert b_ack == 0
-
- while True:
- yield
- out_z_stb = (yield dut.out_z.stb)
- if not out_z_stb:
- continue
- yield dut.in_a.stb.eq(0)
- yield dut.in_b.stb.eq(0)
- yield dut.out_z.ack.eq(1)
- yield
- yield dut.out_z.ack.eq(0)
- yield
- yield
- break
-
- out_z = yield dut.out_z.v
- return out_z
-
-def check_case(dut, a, b, z):
- out_z = yield from get_case(dut, a, b)
- assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
-
-
-def run_test(dut, stimulus_a, stimulus_b, op):
-
- expected_responses = []
- actual_responses = []
- for a, b in zip(stimulus_a, stimulus_b):
- af = Float64.from_bits(a)
- bf = Float64.from_bits(b)
- z = op(af, bf)
- expected_responses.append(z.get_bits())
- #print (af, bf, z)
- actual = yield from get_case(dut, a, b)
- actual_responses.append(actual)
-
- if len(actual_responses) < len(expected_responses):
- print ("Fail ... not enough results")
- exit(0)
-
- for exp, act, a, b in zip(expected_responses, actual_responses,
- stimulus_a, stimulus_b):
- passed = match(exp, act)
-
- if not passed:
-
- print ("Fail ... expected:", hex(exp), "actual:", hex(act))
-
- print (hex(a))
- print ("a mantissa:", a & 0x000fffffffffffff)
- print ("a exponent:", ((a & 0x7ff0000000000000) >> 52)\
- - 1023)
- print ("a sign:", ((a & 0x8000000000000000) >> 63))
-
- print (hex(b))
- print ("b mantissa:", b & 0x000fffffffffffff)
- print ("b exponent:", ((b & 0x7ff0000000000000) >> 52)\
- - 1023)
- print ("b sign:", ((b & 0x8000000000000000) >> 63))
-
- print (hex(exp))
- print ("expected mantissa:", exp & 0x000fffffffffffff)
- print ("expected exponent:", ((exp & 0x7ff0000000000000) >> 52)\
- - 1023)
- print ("expected sign:", ((exp & 0x8000000000000000) >> 63))
-
- print (hex(act))
- print ("actual mantissa:", act & 0x000fffffffffffff)
- print ("actual exponent:", ((act & 0x7ff0000000000000) >> 52)\
- - 1023)
- print ("actual sign:", ((act & 0x8000000000000000) >> 63))
-
- sys.exit(0)
-
-
-def run_corner_cases(dut, count, op):
- #corner cases
- from itertools import permutations
- stimulus_a = [i[0] for i in permutations([
- 0x8000000000000000,
- 0x0000000000000000,
- 0x7ff8000000000000,
- 0xfff8000000000000,
- 0x7ff0000000000000,
- 0xfff0000000000000
- ], 2)]
- stimulus_b = [i[1] for i in permutations([
- 0x8000000000000000,
- 0x0000000000000000,
- 0x7ff8000000000000,
- 0xfff8000000000000,
- 0x7ff0000000000000,
- 0xfff0000000000000
- ], 2)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
-
-def run_edge_cases(dut, count, op):
- #edge cases
- stimulus_a = [0x8000000000000000 for i in range(1000)]
- stimulus_b = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_a = [0x0000000000000000 for i in range(1000)]
- stimulus_b = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_b = [0x8000000000000000 for i in range(1000)]
- stimulus_a = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_b = [0x0000000000000000 for i in range(1000)]
- stimulus_a = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_a = [0x7FF8000000000000 for i in range(1000)]
- stimulus_b = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_a = [0xFFF8000000000000 for i in range(1000)]
- stimulus_b = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_b = [0x7FF8000000000000 for i in range(1000)]
- stimulus_a = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_b = [0xFFF8000000000000 for i in range(1000)]
- stimulus_a = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_a = [0x7FF0000000000000 for i in range(1000)]
- stimulus_b = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_a = [0xFFF0000000000000 for i in range(1000)]
- stimulus_b = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_b = [0x7FF0000000000000 for i in range(1000)]
- stimulus_a = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_b = [0xFFF0000000000000 for i in range(1000)]
- stimulus_a = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- #seed(0)
- for i in range(100000):
- stimulus_a = [randint(0, 1<<64) for i in range(1000)]
- stimulus_b = [randint(0, 1<<64) for i in range(1000)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += 1000
- print (count, "random vectors passed")
-
+++ /dev/null
-from random import randint
-from random import seed
-
-import sys
-from sfpy import Float16
-
-def get_mantissa(x):
- return 0x3ff & x
-
-def get_exponent(x):
- return ((x & 0xf800) >> 11) - 15
-
-def get_sign(x):
- return ((x & 0x8000) >> 15)
-
-def is_nan(x):
- return get_exponent(x) == 16 and get_mantissa(x) != 0
-
-def is_inf(x):
- return get_exponent(x) == 16 and get_mantissa(x) == 0
-
-def is_pos_inf(x):
- return is_inf(x) and not get_sign(x)
-
-def is_neg_inf(x):
- return is_inf(x) and get_sign(x)
-
-def match(x, y):
- return (
- (is_pos_inf(x) and is_pos_inf(y)) or
- (is_neg_inf(x) and is_neg_inf(y)) or
- (is_nan(x) and is_nan(y)) or
- (x == y)
- )
-
-def get_case(dut, a, b):
- yield dut.in_a.v.eq(a)
- yield dut.in_a.stb.eq(1)
- yield
- yield
- a_ack = (yield dut.in_a.ack)
- assert a_ack == 0
- yield dut.in_b.v.eq(b)
- yield dut.in_b.stb.eq(1)
- b_ack = (yield dut.in_b.ack)
- assert b_ack == 0
-
- while True:
- yield
- out_z_stb = (yield dut.out_z.stb)
- if not out_z_stb:
- continue
- yield dut.in_a.stb.eq(0)
- yield dut.in_b.stb.eq(0)
- yield dut.out_z.ack.eq(1)
- yield
- yield dut.out_z.ack.eq(0)
- yield
- yield
- break
-
- out_z = yield dut.out_z.v
- return out_z
-
-def check_case(dut, a, b, z):
- out_z = yield from get_case(dut, a, b)
- assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
-
-
-def run_test(dut, stimulus_a, stimulus_b, op):
-
- expected_responses = []
- actual_responses = []
- for a, b in zip(stimulus_a, stimulus_b):
- af = Float16.from_bits(a)
- bf = Float16.from_bits(b)
- z = op(af, bf)
- expected_responses.append(z.get_bits())
- #print (af, bf, z)
- actual = yield from get_case(dut, a, b)
- actual_responses.append(actual)
-
- if len(actual_responses) < len(expected_responses):
- print ("Fail ... not enough results")
- exit(0)
-
- for expected, actual, a, b in zip(expected_responses, actual_responses,
- stimulus_a, stimulus_b):
- passed = match(expected, actual)
-
- if not passed:
-
- print ("Fail ... expected:", hex(expected), "actual:", hex(actual))
-
- print (hex(a))
- print ("a mantissa:", get_mantissa(a))
- print ("a exponent:", get_exponent(a))
- print ("a sign:", get_sign(a))
-
- print (hex(b))
- print ("b mantissa:", get_mantissa(b))
- print ("b exponent:", get_exponent(b))
- print ("b sign:", get_sign(b))
-
- print (hex(expected))
- print ("expected mantissa:", get_mantissa(expected))
- print ("expected exponent:", get_exponent(expected))
- print ("expected sign:", get_sign(expected))
-
- print (hex(actual))
- print ("actual mantissa:", get_mantissa(actual))
- print ("actual exponent:", get_exponent(actual))
- print ("actual sign:", get_sign(actual))
-
- sys.exit(0)
-
-def run_corner_cases(dut, count, op):
- #corner cases
- corners = [0x8000, 0x0000, 0x7800, 0xf800, 0x7c00, 0xfc00]
- from itertools import permutations
- stimulus_a = [i[0] for i in permutations(corners, 2)]
- stimulus_b = [i[1] for i in permutations(corners, 2)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
-
-def run_edge_cases(dut, count, op):
- maxint16 = 1<<16
- maxcount = 10
- #edge cases
- stimulus_a = [0x8000 for i in range(maxcount)]
- stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_a = [0x0000 for i in range(maxcount)]
- stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_b = [0x8000 for i in range(maxcount)]
- stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_b = [0x0000 for i in range(maxcount)]
- stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_a = [0x7800 for i in range(maxcount)]
- stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_a = [0xF800 for i in range(maxcount)]
- stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_b = [0x7800 for i in range(maxcount)]
- stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_b = [0xF800 for i in range(maxcount)]
- stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_a = [0x7C00 for i in range(maxcount)]
- stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_a = [0xFC00 for i in range(maxcount)]
- stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_b = [0x7C00 for i in range(maxcount)]
- stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- stimulus_b = [0xFC00 for i in range(maxcount)]
- stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
- #seed(0)
- for i in range(100000):
- stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
- stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
- yield from run_test(dut, stimulus_a, stimulus_b, op)
- count += maxcount
- print (count, "random vectors passed")
-
+++ /dev/null
-from random import randint
-from random import seed
-
-import sys
-from sfpy import Float32
-
-def get_mantissa(x):
- return 0x7fffff & x
-
-def get_exponent(x):
- return ((x & 0x7f800000) >> 23) - 127
-
-def set_exponent(x, e):
- return (x & ~0x7f800000) | ((e+127) << 23)
-
-def get_sign(x):
- return ((x & 0x80000000) >> 31)
-
-def is_nan(x):
- return get_exponent(x) == 128 and get_mantissa(x) != 0
-
-def is_inf(x):
- return get_exponent(x) == 128 and get_mantissa(x) == 0
-
-def is_pos_inf(x):
- return is_inf(x) and not get_sign(x)
-
-def is_neg_inf(x):
- return is_inf(x) and get_sign(x)
-
-def match(x, y):
- return (
- (is_pos_inf(x) and is_pos_inf(y)) or
- (is_neg_inf(x) and is_neg_inf(y)) or
- (is_nan(x) and is_nan(y)) or
- (x == y)
- )
-
-def get_rs_case(dut, a, b, mid):
- in_a, in_b = dut.rs[0]
- out_z = dut.res[0]
- yield dut.ids.in_mid.eq(mid)
- yield in_a.v.eq(a)
- yield in_a.valid_i.eq(1)
- yield
- yield
- yield
- yield
- a_ack = (yield in_a.ready_o)
- assert a_ack == 0
-
- yield in_a.valid_i.eq(0)
-
- yield in_b.v.eq(b)
- yield in_b.valid_i.eq(1)
- yield
- yield
- b_ack = (yield in_b.ready_o)
- assert b_ack == 0
-
- yield in_b.valid_i.eq(0)
-
- yield out_z.ready_i.eq(1)
-
- while True:
- out_z_stb = (yield out_z.valid_o)
- if not out_z_stb:
- yield
- continue
- vout_z = yield out_z.v
- #out_mid = yield dut.ids.out_mid
- yield out_z.ready_i.eq(0)
- yield
- break
-
- return vout_z, mid
-
-def check_rs_case(dut, a, b, z, mid=None):
- if mid is None:
- mid = randint(0, 6)
- mid = 0
- out_z, out_mid = yield from get_rs_case(dut, a, b, mid)
- assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
- assert out_mid == mid, "Output mid 0x%x != expected 0x%x" % (out_mid, mid)
-
-
-def get_case(dut, a, b, mid):
- #yield dut.in_mid.eq(mid)
- yield dut.in_a.v.eq(a)
- yield dut.in_a.valid_i_test.eq(1)
- yield
- yield
- yield
- yield
- a_ack = (yield dut.in_a.ready_o)
- assert a_ack == 0
-
- yield dut.in_a.valid_i.eq(0)
-
- yield dut.in_b.v.eq(b)
- yield dut.in_b.valid_i.eq(1)
- yield
- yield
- b_ack = (yield dut.in_b.ready_o)
- assert b_ack == 0
-
- yield dut.in_b.valid_i.eq(0)
-
- yield dut.out_z.ready_i.eq(1)
-
- while True:
- out_z_stb = (yield dut.out_z.valid_o)
- if not out_z_stb:
- yield
- continue
- out_z = yield dut.out_z.v
- #out_mid = yield dut.out_mid
- yield dut.out_z.ready_i.eq(0)
- yield
- break
-
- return out_z, mid # TODO: mid
-
-def check_case(dut, a, b, z, mid=None):
- if mid is None:
- mid = randint(0, 6)
- mid = 0
- out_z, out_mid = yield from get_case(dut, a, b, mid)
- assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
- assert out_mid == mid, "Output mid 0x%x != expected 0x%x" % (out_mid, mid)
-
-
-def run_test(dut, stimulus_a, stimulus_b, op, get_case_fn):
-
- expected_responses = []
- actual_responses = []
- for a, b in zip(stimulus_a, stimulus_b):
- mid = randint(0, 6)
- mid = 0
- af = Float32.from_bits(a)
- bf = Float32.from_bits(b)
- z = op(af, bf)
- expected_responses.append((z.get_bits(), mid))
- actual = yield from get_case_fn(dut, a, b, mid)
- actual_responses.append(actual)
-
- if len(actual_responses) < len(expected_responses):
- print ("Fail ... not enough results")
- exit(0)
-
- for expected, actual, a, b in zip(expected_responses, actual_responses,
- stimulus_a, stimulus_b):
- passed = match(expected[0], actual[0])
- if expected[1] != actual[1]: # check mid
- print ("MID failed", expected[1], actual[1])
- sys.exit(0)
-
- if not passed:
-
- expected = expected[0]
- actual = actual[0]
- print ("Fail ... expected:", hex(expected), "actual:", hex(actual))
-
- print (hex(a))
- print ("a mantissa:", a & 0x7fffff)
- print ("a exponent:", ((a & 0x7f800000) >> 23) - 127)
- print ("a sign:", ((a & 0x80000000) >> 31))
-
- print (hex(b))
- print ("b mantissa:", b & 0x7fffff)
- print ("b exponent:", ((b & 0x7f800000) >> 23) - 127)
- print ("b sign:", ((b & 0x80000000) >> 31))
-
- print (hex(expected))
- print ("expected mantissa:", expected & 0x7fffff)
- print ("expected exponent:", ((expected & 0x7f800000) >> 23) - 127)
- print ("expected sign:", ((expected & 0x80000000) >> 31))
-
- print (hex(actual))
- print ("actual mantissa:", actual & 0x7fffff)
- print ("actual exponent:", ((actual & 0x7f800000) >> 23) - 127)
- print ("actual sign:", ((actual & 0x80000000) >> 31))
-
- sys.exit(0)
-
-corner_cases = [0x80000000, 0x00000000, 0x7f800000, 0xff800000,
- 0x7fc00000, 0xffc00000]
-
-def run_corner_cases(dut, count, op, get_case_fn):
- #corner cases
- from itertools import permutations
- stimulus_a = [i[0] for i in permutations(corner_cases, 2)]
- stimulus_b = [i[1] for i in permutations(corner_cases, 2)]
- yield from run_test(dut, stimulus_a, stimulus_b, op, get_case_fn)
- count += len(stimulus_a)
- print (count, "vectors passed")
-
-def run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn):
- yield from run_test(dut, stimulus_a, stimulus_b, op, get_case_fn)
- yield from run_test(dut, stimulus_b, stimulus_a, op, get_case_fn)
-
-def run_cases(dut, count, op, fixed_num, num_entries, get_case_fn):
- if isinstance(fixed_num, int):
- stimulus_a = [fixed_num for i in range(num_entries)]
- report = hex(fixed_num)
- else:
- stimulus_a = fixed_num
- report = "random"
-
- stimulus_b = [randint(0, 1<<32) for i in range(num_entries)]
- yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn)
- count += len(stimulus_a)
- print (count, "vectors passed 2^32", report)
-
- # non-canonical NaNs.
- stimulus_b = [set_exponent(randint(0, 1<<32), 128) \
- for i in range(num_entries)]
- yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn)
- count += len(stimulus_a)
- print (count, "vectors passed Non-Canonical NaN", report)
-
- # -127
- stimulus_b = [set_exponent(randint(0, 1<<32), -127) \
- for i in range(num_entries)]
- yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn)
- count += len(stimulus_a)
- print (count, "vectors passed exp=-127", report)
-
- # nearly zero
- stimulus_b = [set_exponent(randint(0, 1<<32), -126) \
- for i in range(num_entries)]
- yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn)
- count += len(stimulus_a)
- print (count, "vectors passed exp=-126", report)
-
- # nearly inf
- stimulus_b = [set_exponent(randint(0, 1<<32), 127) \
- for i in range(num_entries)]
- yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn)
- count += len(stimulus_a)
- print (count, "vectors passed exp=127", report)
-
- return count
-
-def run_edge_cases(dut, count, op, get_case_fn):
- #edge cases
- for testme in corner_cases:
- count = yield from run_cases(dut, count, op, testme, 10, get_case_fn)
-
- for i in range(100000):
- stimulus_a = [randint(0, 1<<32) for i in range(10)]
- count = yield from run_cases(dut, count, op, stimulus_a, 10,
- get_case_fn)
- return count
-
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from math import log
+from nmigen import Module
+from nmigen.cli import main, verilog
+
+from singlepipe import PassThroughStage
+from multipipe import CombMuxOutPipe
+from multipipe import PriorityCombMuxInPipe
+
+from fpcommon.getop import FPADDBaseData
+from fpcommon.denorm import FPSCData
+from fpcommon.pack import FPPackData
+from fpcommon.normtopack import FPNormToPack
+from fpadd.specialcases import FPAddSpecialCasesDeNorm
+from fpadd.addstages import FPAddAlignSingleAdd
+
+
+def num_bits(n):
+ return int(log(n) / log(2))
+
+class FPADDInMuxPipe(PriorityCombMuxInPipe):
+ def __init__(self, num_rows, iospecfn):
+ self.num_rows = num_rows
+ stage = PassThroughStage(iospecfn)
+ PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
+
+
+class FPADDMuxOutPipe(CombMuxOutPipe):
+ def __init__(self, num_rows, iospecfn):
+ self.num_rows = num_rows
+ stage = PassThroughStage(iospecfn)
+ CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
+
+
+class ReservationStations:
+ """ Reservation-Station pipeline
+
+ Input: num_rows - number of input and output Reservation Stations
+
+ Requires: the addition of an "alu" object, an i_specfn and an o_specfn
+
+ * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
+ * ALU pipeline
+ * fan-out on outputs (an array of FPPackData: z,mid)
+
+ Fan-in and Fan-out are combinatorial.
+ """
+ def __init__(self, num_rows):
+ self.num_rows = num_rows
+ self.inpipe = FPADDInMuxPipe(num_rows, self.i_specfn) # fan-in
+ self.outpipe = FPADDMuxOutPipe(num_rows, self.o_specfn) # fan-out
+
+ self.p = self.inpipe.p # kinda annoying,
+ self.n = self.outpipe.n # use pipe in/out as this class in/out
+ self._ports = self.inpipe.ports() + self.outpipe.ports()
+
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.inpipe = self.inpipe
+ m.submodules.alu = self.alu
+ m.submodules.outpipe = self.outpipe
+
+ m.d.comb += self.inpipe.n.connect_to_next(self.alu.p)
+ m.d.comb += self.alu.connect_to_next(self.outpipe)
+
+ return m
+
+ def ports(self):
+ return self._ports
+
+
--- /dev/null
+from nmigen import *
+from nmigen.cli import main
+
+from nmigen_add_experiment import FPADD
+from fpbase import FPOp
+
+
+class Adder:
+ def __init__(self, width):
+ self.a = Signal(width)
+ self.b = Signal(width)
+ self.o = Signal(width)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.o.eq(self.a + self.b)
+ return m
+
+
+class Subtractor:
+ def __init__(self, width):
+ self.a = Signal(width)
+ self.b = Signal(width)
+ self.o = Signal(width)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.o.eq(self.a - self.b)
+ return m
+
+
+class ALU:
+ def __init__(self, width):
+ #self.op = Signal()
+ self.a = FPOp(width)
+ self.b = FPOp(width)
+ self.c = FPOp(width)
+ self.z = FPOp(width)
+ self.int_stb = Signal()
+
+ self.add1 = FPADD(width)
+ self.add2 = FPADD(width)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.add1 = self.add1
+ m.submodules.add2 = self.add2
+ # join add1 a to a: add1.in_a = a
+ m.d.comb += self.add1.in_a.chain_from(self.a)
+ # join add1 b to b: add1.in_b = b
+ m.d.comb += self.add1.in_b.chain_from(self.b)
+ # join add2 a to c: add2.in_a = c
+ m.d.comb += self.add2.in_a.chain_from(self.c)
+ # join add2 b to add1 z: add2.in_b = add1.out_z
+ m.d.comb += self.add2.in_b.chain_inv(self.add1.out_z)
+ # join output from add2 to z: z = add2.out_z
+ m.d.comb += self.z.chain_from(self.add2.out_z)
+ # get at add1's stb signal
+ m.d.comb += self.int_stb.eq(self.add1.out_z.stb)
+ #with m.If(self.op):
+ # m.d.comb += self.o.eq(self.sub.o)
+ #with m.Else():
+ # m.d.comb += self.o.eq(self.add.o)
+ return m
+
+
+if __name__ == "__main__":
+ alu = ALU(width=16)
+ main(alu, ports=alu.a.ports() + \
+ alu.b.ports() + \
+ alu.c.ports() + \
+ alu.z.ports())
--- /dev/null
+""" Pipeline and BufferedHandshake examples
+"""
+
+from nmoperator import eq
+from iocontrol import (PrevControl, NextControl)
+from singlepipe import (PrevControl, NextControl, ControlBase,
+ StageCls, Stage, StageChain,
+ BufferedHandshake, UnbufferedPipeline)
+
+from nmigen import Signal, Module
+from nmigen.cli import verilog, rtlil
+
+
+class ExampleAddStage(StageCls):
+ """ an example of how to use the buffered pipeline, as a class instance
+ """
+
+ def ispec(self):
+ """ returns a tuple of input signals which will be the incoming data
+ """
+ return (Signal(16), Signal(16))
+
+ def ospec(self):
+ """ returns an output signal which will happen to contain the sum
+ of the two inputs
+ """
+ return Signal(16)
+
+ def process(self, i):
+ """ process the input data (sums the values in the tuple) and returns it
+ """
+ return i[0] + i[1]
+
+
+class ExampleBufPipeAdd(BufferedHandshake):
+ """ an example of how to use the buffered pipeline, using a class instance
+ """
+
+ def __init__(self):
+ addstage = ExampleAddStage()
+ BufferedHandshake.__init__(self, addstage)
+
+
+class ExampleStage(Stage):
+ """ an example of how to use the buffered pipeline, in a static class
+ fashion
+ """
+
+ def ispec():
+ return Signal(16, name="example_input_signal")
+
+ def ospec():
+ return Signal(16, name="example_output_signal")
+
+ def process(i):
+ """ process the input data and returns it (adds 1)
+ """
+ return i + 1
+
+
+class ExampleStageCls(StageCls):
+ """ an example of how to use the buffered pipeline, in a static class
+ fashion
+ """
+
+ def ispec(self):
+ return Signal(16, name="example_input_signal")
+
+ def ospec(self):
+ return Signal(16, name="example_output_signal")
+
+ def process(self, i):
+ """ process the input data and returns it (adds 1)
+ """
+ return i + 1
+
+
+class ExampleBufPipe(BufferedHandshake):
+ """ an example of how to use the buffered pipeline.
+ """
+
+ def __init__(self):
+ BufferedHandshake.__init__(self, ExampleStage)
+
+
+class ExamplePipeline(UnbufferedPipeline):
+ """ an example of how to use the unbuffered pipeline.
+ """
+
+ def __init__(self):
+ UnbufferedPipeline.__init__(self, ExampleStage)
+
+
+if __name__ == '__main__':
+ dut = ExampleBufPipe()
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_bufpipe.il", "w") as f:
+ f.write(vl)
+
+ dut = ExamplePipeline()
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_combpipe.il", "w") as f:
+ f.write(vl)
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Cat
+from nmigen.cli import main, verilog
+
+from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase
+
+from singlepipe import eq
+
+
+class FPADD(FPBase):
+
+ def __init__(self, width, single_cycle=False):
+ FPBase.__init__(self)
+ self.width = width
+ self.single_cycle = single_cycle
+
+ self.in_a = FPOp(width)
+ self.in_b = FPOp(width)
+ self.out_z = FPOp(width)
+
+ def elaborate(self, platform=None):
+ """ creates the HDL code-fragment for FPAdd
+ """
+ m = Module()
+
+ # Latches
+ a = FPNumIn(self.in_a, self.width)
+ b = FPNumIn(self.in_b, self.width)
+ z = FPNumOut(self.width, False)
+
+ m.submodules.fpnum_a = a
+ m.submodules.fpnum_b = b
+ m.submodules.fpnum_z = z
+
+ m.d.comb += a.v.eq(self.in_a.v)
+ m.d.comb += b.v.eq(self.in_b.v)
+
+ w = z.m_width + 4
+ tot = Signal(w, reset_less=True) # sticky/round/guard, {mantissa} result, 1 overflow
+
+ of = Overflow()
+
+ m.submodules.overflow = of
+
+ with m.FSM() as fsm:
+
+ # ******
+ # gets operand a
+
+ with m.State("get_a"):
+ res = self.get_op(m, self.in_a, a, "get_b")
+ m.d.sync += eq([a, self.in_a.ack], res)
+
+ # ******
+ # gets operand b
+
+ with m.State("get_b"):
+ res = self.get_op(m, self.in_b, b, "special_cases")
+ m.d.sync += eq([b, self.in_b.ack], res)
+
+ # ******
+ # special cases: NaNs, infs, zeros, denormalised
+ # NOTE: some of these are unique to add. see "Special Operations"
+ # https://steve.hollasch.net/cgindex/coding/ieeefloat.html
+
+ with m.State("special_cases"):
+
+ s_nomatch = Signal()
+ m.d.comb += s_nomatch.eq(a.s != b.s)
+
+ m_match = Signal()
+ m.d.comb += m_match.eq(a.m == b.m)
+
+ # if a is NaN or b is NaN return NaN
+ with m.If(a.is_nan | b.is_nan):
+ m.next = "put_z"
+ m.d.sync += z.nan(1)
+
+ # XXX WEIRDNESS for FP16 non-canonical NaN handling
+ # under review
+
+ ## if a is zero and b is NaN return -b
+ #with m.If(a.is_zero & (a.s==0) & b.is_nan):
+ # m.next = "put_z"
+ # m.d.sync += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
+
+ ## if b is zero and a is NaN return -a
+ #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
+ # m.next = "put_z"
+ # m.d.sync += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
+
+ ## if a is -zero and b is NaN return -b
+ #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
+ # m.next = "put_z"
+ # m.d.sync += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
+
+ ## if b is -zero and a is NaN return -a
+ #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
+ # m.next = "put_z"
+ # m.d.sync += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
+
+ # if a is inf return inf (or NaN)
+ with m.Elif(a.is_inf):
+ m.next = "put_z"
+ m.d.sync += z.inf(a.s)
+ # if a is inf and signs don't match return NaN
+ with m.If(b.exp_128 & s_nomatch):
+ m.d.sync += z.nan(1)
+
+ # if b is inf return inf
+ with m.Elif(b.is_inf):
+ m.next = "put_z"
+ m.d.sync += z.inf(b.s)
+
+ # if a is zero and b zero return signed-a/b
+ with m.Elif(a.is_zero & b.is_zero):
+ m.next = "put_z"
+ m.d.sync += z.create(a.s & b.s, b.e, b.m[3:-1])
+
+ # if a is zero return b
+ with m.Elif(a.is_zero):
+ m.next = "put_z"
+ m.d.sync += z.create(b.s, b.e, b.m[3:-1])
+
+ # if b is zero return a
+ with m.Elif(b.is_zero):
+ m.next = "put_z"
+ m.d.sync += z.create(a.s, a.e, a.m[3:-1])
+
+ # if a equal to -b return zero (+ve zero)
+ with m.Elif(s_nomatch & m_match & (a.e == b.e)):
+ m.next = "put_z"
+ m.d.sync += z.zero(0)
+
+ # Denormalised Number checks
+ with m.Else():
+ m.next = "align"
+ self.denormalise(m, a)
+ self.denormalise(m, b)
+
+ # ******
+ # align.
+
+ with m.State("align"):
+ if not self.single_cycle:
+ # NOTE: this does *not* do single-cycle multi-shifting,
+ # it *STAYS* in the align state until exponents match
+
+ # exponent of a greater than b: shift b down
+ with m.If(a.e > b.e):
+ m.d.sync += b.shift_down()
+ # exponent of b greater than a: shift a down
+ with m.Elif(a.e < b.e):
+ m.d.sync += a.shift_down()
+ # exponents equal: move to next stage.
+ with m.Else():
+ m.next = "add_0"
+ else:
+ # This one however (single-cycle) will do the shift
+ # in one go.
+
+ # XXX TODO: the shifter used here is quite expensive
+ # having only one would be better
+
+ ediff = Signal((len(a.e), True), reset_less=True)
+ ediffr = Signal((len(a.e), True), reset_less=True)
+ m.d.comb += ediff.eq(a.e - b.e)
+ m.d.comb += ediffr.eq(b.e - a.e)
+ with m.If(ediff > 0):
+ m.d.sync += b.shift_down_multi(ediff)
+ # exponent of b greater than a: shift a down
+ with m.Elif(ediff < 0):
+ m.d.sync += a.shift_down_multi(ediffr)
+
+ m.next = "add_0"
+
+ # ******
+ # First stage of add. covers same-sign (add) and subtract
+ # special-casing when mantissas are greater or equal, to
+ # give greatest accuracy.
+
+ with m.State("add_0"):
+ m.next = "add_1"
+ m.d.sync += z.e.eq(a.e)
+ # same-sign (both negative or both positive) add mantissas
+ with m.If(a.s == b.s):
+ m.d.sync += [
+ tot.eq(Cat(a.m, 0) + Cat(b.m, 0)),
+ z.s.eq(a.s)
+ ]
+ # a mantissa greater than b, use a
+ with m.Elif(a.m >= b.m):
+ m.d.sync += [
+ tot.eq(Cat(a.m, 0) - Cat(b.m, 0)),
+ z.s.eq(a.s)
+ ]
+ # b mantissa greater than a, use b
+ with m.Else():
+ m.d.sync += [
+ tot.eq(Cat(b.m, 0) - Cat(a.m, 0)),
+ z.s.eq(b.s)
+ ]
+
+ # ******
+ # Second stage of add: preparation for normalisation.
+ # detects when tot sum is too big (tot[27] is kinda a carry bit)
+
+ with m.State("add_1"):
+ m.next = "normalise_1"
+ # tot[27] gets set when the sum overflows. shift result down
+ with m.If(tot[-1]):
+ m.d.sync += [
+ z.m.eq(tot[4:]),
+ of.m0.eq(tot[4]),
+ of.guard.eq(tot[3]),
+ of.round_bit.eq(tot[2]),
+ of.sticky.eq(tot[1] | tot[0]),
+ z.e.eq(z.e + 1)
+ ]
+ # tot[27] zero case
+ with m.Else():
+ m.d.sync += [
+ z.m.eq(tot[3:]),
+ of.m0.eq(tot[3]),
+ of.guard.eq(tot[2]),
+ of.round_bit.eq(tot[1]),
+ of.sticky.eq(tot[0])
+ ]
+
+ # ******
+ # First stage of normalisation.
+
+ with m.State("normalise_1"):
+ self.normalise_1(m, z, of, "normalise_2")
+
+ # ******
+ # Second stage of normalisation.
+
+ with m.State("normalise_2"):
+ self.normalise_2(m, z, of, "round")
+
+ # ******
+ # rounding stage
+
+ with m.State("round"):
+ self.roundz(m, z, of.roundz)
+ m.next = "corrections"
+
+ # ******
+ # correction stage
+
+ with m.State("corrections"):
+ self.corrections(m, z, "pack")
+
+ # ******
+ # pack stage
+
+ with m.State("pack"):
+ self.pack(m, z, "put_z")
+
+ # ******
+ # put_z stage
+
+ with m.State("put_z"):
+ self.put_z(m, z, self.out_z, "get_a")
+
+ return m
+
+
+if __name__ == "__main__":
+ alu = FPADD(width=32)
+ main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports())
+
+
+ # works... but don't use, just do "python fname.py convert -t v"
+ #print (verilog.convert(alu, ports=[
+ # ports=alu.in_a.ports() + \
+ # alu.in_b.ports() + \
+ # alu.out_z.ports())
--- /dev/null
+from nmigen import Module, Signal, Cat, Mux, Array, Const
+from nmigen.cli import main, verilog
+
+from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPState
+from fpcommon.getop import FPGetOp
+from singlepipe import eq
+
+
+class FPMUL(FPBase):
+
+ def __init__(self, width):
+ FPBase.__init__(self)
+ self.width = width
+
+ self.in_a = FPOp(width)
+ self.in_b = FPOp(width)
+ self.out_z = FPOp(width)
+
+ self.states = []
+
+ def add_state(self, state):
+ self.states.append(state)
+ return state
+
+ def elaborate(self, platform=None):
+ """ creates the HDL code-fragment for FPMUL
+ """
+ m = Module()
+
+ # Latches
+ a = FPNumIn(None, self.width, False)
+ b = FPNumIn(None, self.width, False)
+ z = FPNumOut(self.width, False)
+
+ mw = (z.m_width)*2 - 1 + 3 # sticky/round/guard bits + (2*mant) - 1
+ product = Signal(mw)
+
+ of = Overflow()
+ m.submodules.of = of
+ m.submodules.a = a
+ m.submodules.b = b
+ m.submodules.z = z
+
+ m.d.comb += a.v.eq(self.in_a.v)
+ m.d.comb += b.v.eq(self.in_b.v)
+
+ with m.FSM() as fsm:
+
+ # ******
+ # gets operand a
+
+ with m.State("get_a"):
+ res = self.get_op(m, self.in_a, a, "get_b")
+ m.d.sync += eq([a, self.in_a.ack], res)
+
+ # ******
+ # gets operand b
+
+ with m.State("get_b"):
+ res = self.get_op(m, self.in_b, b, "special_cases")
+ m.d.sync += eq([b, self.in_b.ack], res)
+
+ # ******
+ # special cases
+
+ with m.State("special_cases"):
+ #if a or b is NaN return NaN
+ with m.If(a.is_nan | b.is_nan):
+ m.next = "put_z"
+ m.d.sync += z.nan(1)
+ #if a is inf return inf
+ with m.Elif(a.is_inf):
+ m.next = "put_z"
+ m.d.sync += z.inf(a.s ^ b.s)
+ #if b is zero return NaN
+ with m.If(b.is_zero):
+ m.d.sync += z.nan(1)
+ #if b is inf return inf
+ with m.Elif(b.is_inf):
+ m.next = "put_z"
+ m.d.sync += z.inf(a.s ^ b.s)
+ #if a is zero return NaN
+ with m.If(a.is_zero):
+ m.next = "put_z"
+ m.d.sync += z.nan(1)
+ #if a is zero return zero
+ with m.Elif(a.is_zero):
+ m.next = "put_z"
+ m.d.sync += z.zero(a.s ^ b.s)
+ #if b is zero return zero
+ with m.Elif(b.is_zero):
+ m.next = "put_z"
+ m.d.sync += z.zero(a.s ^ b.s)
+ # Denormalised Number checks
+ with m.Else():
+ m.next = "normalise_a"
+ self.denormalise(m, a)
+ self.denormalise(m, b)
+
+ # ******
+ # normalise_a
+
+ with m.State("normalise_a"):
+ self.op_normalise(m, a, "normalise_b")
+
+ # ******
+ # normalise_b
+
+ with m.State("normalise_b"):
+ self.op_normalise(m, b, "multiply_0")
+
+ #multiply_0
+ with m.State("multiply_0"):
+ m.next = "multiply_1"
+ m.d.sync += [
+ z.s.eq(a.s ^ b.s),
+ z.e.eq(a.e + b.e + 1),
+ product.eq(a.m * b.m * 4)
+ ]
+
+ #multiply_1
+ with m.State("multiply_1"):
+ mw = z.m_width
+ m.next = "normalise_1"
+ m.d.sync += [
+ z.m.eq(product[mw+2:]),
+ of.guard.eq(product[mw+1]),
+ of.round_bit.eq(product[mw]),
+ of.sticky.eq(product[0:mw] != 0)
+ ]
+
+ # ******
+ # First stage of normalisation.
+ with m.State("normalise_1"):
+ self.normalise_1(m, z, of, "normalise_2")
+
+ # ******
+ # Second stage of normalisation.
+
+ with m.State("normalise_2"):
+ self.normalise_2(m, z, of, "round")
+
+ # ******
+ # rounding stage
+
+ with m.State("round"):
+ self.roundz(m, z, of.roundz)
+ m.next = "corrections"
+
+ # ******
+ # correction stage
+
+ with m.State("corrections"):
+ self.corrections(m, z, "pack")
+
+ # ******
+ # pack stage
+ with m.State("pack"):
+ self.pack(m, z, "put_z")
+
+ # ******
+ # put_z stage
+
+ with m.State("put_z"):
+ self.put_z(m, z, self.out_z, "get_a")
+
+ return m
+
+
+if __name__ == "__main__":
+ alu = FPMUL(width=32)
+ main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports())
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Cat, Elaboratable
+from nmigen.cli import main, verilog
+
+from fpbase import FPNumBase
+from fpbase import FPState
+from fpcommon.denorm import FPSCData
+
+
+class FPAddStage0Data:
+
+ def __init__(self, width, id_wid):
+ self.z = FPNumBase(width, False)
+ self.out_do_z = Signal(reset_less=True)
+ self.oz = Signal(width, reset_less=True)
+ self.tot = Signal(self.z.m_width + 4, reset_less=True)
+ self.mid = Signal(id_wid, reset_less=True)
+
+ def eq(self, i):
+ return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
+ self.tot.eq(i.tot), self.mid.eq(i.mid)]
+
+
+class FPAddStage0Mod(Elaboratable):
+
+ def __init__(self, width, id_wid):
+ self.width = width
+ self.id_wid = id_wid
+ self.i = self.ispec()
+ self.o = self.ospec()
+
+ def ispec(self):
+ return FPSCData(self.width, self.id_wid)
+
+ def ospec(self):
+ return FPAddStage0Data(self.width, self.id_wid)
+
+ def process(self, i):
+ return self.o
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ m.submodules.add0 = self
+ m.d.comb += self.i.eq(i)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.add0_in_a = self.i.a
+ m.submodules.add0_in_b = self.i.b
+ m.submodules.add0_out_z = self.o.z
+
+ # store intermediate tests (and zero-extended mantissas)
+ seq = Signal(reset_less=True)
+ mge = Signal(reset_less=True)
+ am0 = Signal(len(self.i.a.m)+1, reset_less=True)
+ bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
+ m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
+ mge.eq(self.i.a.m >= self.i.b.m),
+ am0.eq(Cat(self.i.a.m, 0)),
+ bm0.eq(Cat(self.i.b.m, 0))
+ ]
+ # same-sign (both negative or both positive) add mantissas
+ with m.If(~self.i.out_do_z):
+ m.d.comb += self.o.z.e.eq(self.i.a.e)
+ with m.If(seq):
+ m.d.comb += [
+ self.o.tot.eq(am0 + bm0),
+ self.o.z.s.eq(self.i.a.s)
+ ]
+ # a mantissa greater than b, use a
+ with m.Elif(mge):
+ m.d.comb += [
+ self.o.tot.eq(am0 - bm0),
+ self.o.z.s.eq(self.i.a.s)
+ ]
+ # b mantissa greater than a, use b
+ with m.Else():
+ m.d.comb += [
+ self.o.tot.eq(bm0 - am0),
+ self.o.z.s.eq(self.i.b.s)
+ ]
+
+ m.d.comb += self.o.oz.eq(self.i.oz)
+ m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
+ m.d.comb += self.o.mid.eq(self.i.mid)
+ return m
+
+
+class FPAddStage0(FPState):
+ """ First stage of add. covers same-sign (add) and subtract
+ special-casing when mantissas are greater or equal, to
+ give greatest accuracy.
+ """
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "add_0")
+ self.mod = FPAddStage0Mod(width)
+ self.o = self.mod.ospec()
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ self.mod.setup(m, i)
+
+ # NOTE: these could be done as combinatorial (merge add0+add1)
+ m.d.sync += self.o.eq(self.mod.o)
+
+ def action(self, m):
+ m.next = "add_1"
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Elaboratable
+from nmigen.cli import main, verilog
+from math import log
+
+from fpbase import FPState
+from fpcommon.postcalc import FPAddStage1Data
+from fpadd.add0 import FPAddStage0Data
+
+
+class FPAddStage1Mod(FPState, Elaboratable):
+ """ Second stage of add: preparation for normalisation.
+ detects when tot sum is too big (tot[27] is kinda a carry bit)
+ """
+
+ def __init__(self, width, id_wid):
+ self.width = width
+ self.id_wid = id_wid
+ self.i = self.ispec()
+ self.o = self.ospec()
+
+ def ispec(self):
+ return FPAddStage0Data(self.width, self.id_wid)
+
+ def ospec(self):
+ return FPAddStage1Data(self.width, self.id_wid)
+
+ def process(self, i):
+ return self.o
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ m.submodules.add1 = self
+ m.submodules.add1_out_overflow = self.o.of
+
+ m.d.comb += self.i.eq(i)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.o.z.eq(self.i.z)
+ # tot[-1] (MSB) gets set when the sum overflows. shift result down
+ with m.If(~self.i.out_do_z):
+ with m.If(self.i.tot[-1]):
+ m.d.comb += [
+ self.o.z.m.eq(self.i.tot[4:]),
+ self.o.of.m0.eq(self.i.tot[4]),
+ self.o.of.guard.eq(self.i.tot[3]),
+ self.o.of.round_bit.eq(self.i.tot[2]),
+ self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
+ self.o.z.e.eq(self.i.z.e + 1)
+ ]
+ # tot[-1] (MSB) zero case
+ with m.Else():
+ m.d.comb += [
+ self.o.z.m.eq(self.i.tot[3:]),
+ self.o.of.m0.eq(self.i.tot[3]),
+ self.o.of.guard.eq(self.i.tot[2]),
+ self.o.of.round_bit.eq(self.i.tot[1]),
+ self.o.of.sticky.eq(self.i.tot[0])
+ ]
+
+ m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
+ m.d.comb += self.o.oz.eq(self.i.oz)
+ m.d.comb += self.o.mid.eq(self.i.mid)
+
+ return m
+
+
+class FPAddStage1(FPState):
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "add_1")
+ self.mod = FPAddStage1Mod(width)
+ self.out_z = FPNumBase(width, False)
+ self.out_of = Overflow()
+ self.norm_stb = Signal()
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ self.mod.setup(m, i)
+
+ m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
+
+ m.d.sync += self.out_of.eq(self.mod.out_of)
+ m.d.sync += self.out_z.eq(self.mod.out_z)
+ m.d.sync += self.norm_stb.eq(1)
+
+ def action(self, m):
+ m.next = "normalise_1"
+
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module
+from nmigen.cli import main, verilog
+
+from singlepipe import (StageChain, SimpleHandshake,
+ PassThroughStage)
+
+from fpbase import FPState
+from fpcommon.denorm import FPSCData
+from fpcommon.postcalc import FPAddStage1Data
+from fpadd.align import FPAddAlignSingleMod
+from fpadd.add0 import FPAddStage0Mod
+from fpadd.add1 import FPAddStage1Mod
+
+
+class FPAddAlignSingleAdd(FPState, SimpleHandshake):
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "align")
+ self.width = width
+ self.id_wid = id_wid
+ SimpleHandshake.__init__(self, self) # pipeline is its own stage
+ self.a1o = self.ospec()
+
+ def ispec(self):
+ return FPSCData(self.width, self.id_wid)
+
+ def ospec(self):
+ return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+
+ # chain AddAlignSingle, AddStage0 and AddStage1
+ mod = FPAddAlignSingleMod(self.width, self.id_wid)
+ a0mod = FPAddStage0Mod(self.width, self.id_wid)
+ a1mod = FPAddStage1Mod(self.width, self.id_wid)
+
+ chain = StageChain([mod, a0mod, a1mod])
+ chain.setup(m, i)
+
+ self.o = a1mod.o
+
+ def process(self, i):
+ return self.o
+
+ def action(self, m):
+ m.d.sync += self.a1o.eq(self.process(None))
+ m.next = "normalise_1"
+
+
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal
+from nmigen.cli import main, verilog
+
+from fpbase import FPNumOut, FPNumIn, FPNumBase
+from fpbase import MultiShiftRMerge
+from fpbase import FPState
+from fpcommon.denorm import FPSCData
+
+
+class FPNumIn2Ops:
+
+ def __init__(self, width, id_wid):
+ self.a = FPNumIn(None, width)
+ self.b = FPNumIn(None, width)
+ self.z = FPNumOut(width, False)
+ self.out_do_z = Signal(reset_less=True)
+ self.oz = Signal(width, reset_less=True)
+ self.mid = Signal(id_wid, reset_less=True)
+
+ def eq(self, i):
+ return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
+ self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
+
+
+
+class FPAddAlignMultiMod(FPState):
+
+ def __init__(self, width):
+ self.in_a = FPNumBase(width)
+ self.in_b = FPNumBase(width)
+ self.out_a = FPNumIn(None, width)
+ self.out_b = FPNumIn(None, width)
+ self.exp_eq = Signal(reset_less=True)
+
+ def elaborate(self, platform):
+ # This one however (single-cycle) will do the shift
+ # in one go.
+
+ m = Module()
+
+ m.submodules.align_in_a = self.in_a
+ m.submodules.align_in_b = self.in_b
+ m.submodules.align_out_a = self.out_a
+ m.submodules.align_out_b = self.out_b
+
+ # NOTE: this does *not* do single-cycle multi-shifting,
+ # it *STAYS* in the align state until exponents match
+
+ # exponent of a greater than b: shift b down
+ m.d.comb += self.exp_eq.eq(0)
+ m.d.comb += self.out_a.eq(self.in_a)
+ m.d.comb += self.out_b.eq(self.in_b)
+ agtb = Signal(reset_less=True)
+ altb = Signal(reset_less=True)
+ m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
+ m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
+ with m.If(agtb):
+ m.d.comb += self.out_b.shift_down(self.in_b)
+ # exponent of b greater than a: shift a down
+ with m.Elif(altb):
+ m.d.comb += self.out_a.shift_down(self.in_a)
+ # exponents equal: move to next stage.
+ with m.Else():
+ m.d.comb += self.exp_eq.eq(1)
+ return m
+
+
+class FPAddAlignMulti(FPState):
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "align")
+ self.mod = FPAddAlignMultiMod(width)
+ self.out_a = FPNumIn(None, width)
+ self.out_b = FPNumIn(None, width)
+ self.exp_eq = Signal(reset_less=True)
+
+ def setup(self, m, in_a, in_b):
+ """ links module to inputs and outputs
+ """
+ m.submodules.align = self.mod
+ m.d.comb += self.mod.in_a.eq(in_a)
+ m.d.comb += self.mod.in_b.eq(in_b)
+ m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
+ m.d.sync += self.out_a.eq(self.mod.out_a)
+ m.d.sync += self.out_b.eq(self.mod.out_b)
+
+ def action(self, m):
+ with m.If(self.exp_eq):
+ m.next = "add_0"
+
+
+class FPAddAlignSingleMod:
+
+ def __init__(self, width, id_wid):
+ self.width = width
+ self.id_wid = id_wid
+ self.i = self.ispec()
+ self.o = self.ospec()
+
+ def ispec(self):
+ return FPSCData(self.width, self.id_wid)
+
+ def ospec(self):
+ return FPNumIn2Ops(self.width, self.id_wid)
+
+ def process(self, i):
+ return self.o
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ m.submodules.align = self
+ m.d.comb += self.i.eq(i)
+
+ def elaborate(self, platform):
+ """ Aligns A against B or B against A, depending on which has the
+ greater exponent. This is done in a *single* cycle using
+ variable-width bit-shift
+
+ the shifter used here is quite expensive in terms of gates.
+ Mux A or B in (and out) into temporaries, as only one of them
+ needs to be aligned against the other
+ """
+ m = Module()
+
+ m.submodules.align_in_a = self.i.a
+ m.submodules.align_in_b = self.i.b
+ m.submodules.align_out_a = self.o.a
+ m.submodules.align_out_b = self.o.b
+
+ # temporary (muxed) input and output to be shifted
+ t_inp = FPNumBase(self.width)
+ t_out = FPNumIn(None, self.width)
+ espec = (len(self.i.a.e), True)
+ msr = MultiShiftRMerge(self.i.a.m_width, espec)
+ m.submodules.align_t_in = t_inp
+ m.submodules.align_t_out = t_out
+ m.submodules.multishift_r = msr
+
+ ediff = Signal(espec, reset_less=True)
+ ediffr = Signal(espec, reset_less=True)
+ tdiff = Signal(espec, reset_less=True)
+ elz = Signal(reset_less=True)
+ egz = Signal(reset_less=True)
+
+ # connect multi-shifter to t_inp/out mantissa (and tdiff)
+ m.d.comb += msr.inp.eq(t_inp.m)
+ m.d.comb += msr.diff.eq(tdiff)
+ m.d.comb += t_out.m.eq(msr.m)
+ m.d.comb += t_out.e.eq(t_inp.e + tdiff)
+ m.d.comb += t_out.s.eq(t_inp.s)
+
+ m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
+ m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
+ m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
+ m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
+
+ # default: A-exp == B-exp, A and B untouched (fall through)
+ m.d.comb += self.o.a.eq(self.i.a)
+ m.d.comb += self.o.b.eq(self.i.b)
+ # only one shifter (muxed)
+ #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
+ # exponent of a greater than b: shift b down
+ with m.If(~self.i.out_do_z):
+ with m.If(egz):
+ m.d.comb += [t_inp.eq(self.i.b),
+ tdiff.eq(ediff),
+ self.o.b.eq(t_out),
+ self.o.b.s.eq(self.i.b.s), # whoops forgot sign
+ ]
+ # exponent of b greater than a: shift a down
+ with m.Elif(elz):
+ m.d.comb += [t_inp.eq(self.i.a),
+ tdiff.eq(ediffr),
+ self.o.a.eq(t_out),
+ self.o.a.s.eq(self.i.a.s), # whoops forgot sign
+ ]
+
+ m.d.comb += self.o.mid.eq(self.i.mid)
+ m.d.comb += self.o.z.eq(self.i.z)
+ m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
+ m.d.comb += self.o.oz.eq(self.i.oz)
+
+ return m
+
+
+class FPAddAlignSingle(FPState):
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "align")
+ self.mod = FPAddAlignSingleMod(width, id_wid)
+ self.out_a = FPNumIn(None, width)
+ self.out_b = FPNumIn(None, width)
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ self.mod.setup(m, i)
+
+ # NOTE: could be done as comb
+ m.d.sync += self.out_a.eq(self.mod.out_a)
+ m.d.sync += self.out_b.eq(self.mod.out_b)
+
+ def action(self, m):
+ m.next = "add_0"
+
+
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module
+from nmigen.cli import main, verilog
+
+from singlepipe import (ControlBase, SimpleHandshake, PassThroughStage)
+from multipipe import CombMuxOutPipe
+from multipipe import PriorityCombMuxInPipe
+
+from fpcommon.getop import FPADDBaseData
+from fpcommon.denorm import FPSCData
+from fpcommon.pack import FPPackData
+from fpcommon.normtopack import FPNormToPack
+from fpadd.specialcases import FPAddSpecialCasesDeNorm
+from fpadd.addstages import FPAddAlignSingleAdd
+
+from concurrentunit import ReservationStations, num_bits
+
+
+class FPADDBasePipe(ControlBase):
+ def __init__(self, width, id_wid):
+ ControlBase.__init__(self)
+ self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
+ self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
+ self.pipe3 = FPNormToPack(width, id_wid)
+
+ self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+ m.submodules.scnorm = self.pipe1
+ m.submodules.addalign = self.pipe2
+ m.submodules.normpack = self.pipe3
+ m.d.comb += self._eqs
+ return m
+
+
+class FPADDMuxInOut(ReservationStations):
+ """ Reservation-Station version of FPADD pipeline.
+
+ * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
+ * 3-stage adder pipeline
+ * fan-out on outputs (an array of FPPackData: z,mid)
+
+ Fan-in and Fan-out are combinatorial.
+ """
+ def __init__(self, width, num_rows):
+ self.width = width
+ self.id_wid = num_bits(width)
+ self.alu = FPADDBasePipe(width, self.id_wid)
+ ReservationStations.__init__(self, num_rows)
+
+ def i_specfn(self):
+ return FPADDBaseData(self.width, self.id_wid)
+
+ def o_specfn(self):
+ return FPPackData(self.width, self.id_wid)
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Cat, Const
+from nmigen.cli import main, verilog
+from math import log
+
+from fpbase import FPNumDecode
+from singlepipe import SimpleHandshake, StageChain
+
+from fpbase import FPState, FPID
+from fpcommon.getop import FPADDBaseData
+from fpcommon.denorm import (FPSCData, FPAddDeNormMod)
+
+
+class FPAddSpecialCasesMod:
+ """ special cases: NaNs, infs, zeros, denormalised
+ NOTE: some of these are unique to add. see "Special Operations"
+ https://steve.hollasch.net/cgindex/coding/ieeefloat.html
+ """
+
+ def __init__(self, width, id_wid):
+ self.width = width
+ self.id_wid = id_wid
+ self.i = self.ispec()
+ self.o = self.ospec()
+
+ def ispec(self):
+ return FPADDBaseData(self.width, self.id_wid)
+
+ def ospec(self):
+ return FPSCData(self.width, self.id_wid)
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ m.submodules.specialcases = self
+ m.d.comb += self.i.eq(i)
+
+ def process(self, i):
+ return self.o
+
+ def elaborate(self, platform):
+ m = Module()
+
+ m.submodules.sc_out_z = self.o.z
+
+ # decode: XXX really should move to separate stage
+ a1 = FPNumDecode(None, self.width)
+ b1 = FPNumDecode(None, self.width)
+ m.submodules.sc_decode_a = a1
+ m.submodules.sc_decode_b = b1
+ m.d.comb += [a1.v.eq(self.i.a),
+ b1.v.eq(self.i.b),
+ self.o.a.eq(a1),
+ self.o.b.eq(b1)
+ ]
+
+ s_nomatch = Signal(reset_less=True)
+ m.d.comb += s_nomatch.eq(a1.s != b1.s)
+
+ m_match = Signal(reset_less=True)
+ m.d.comb += m_match.eq(a1.m == b1.m)
+
+ e_match = Signal(reset_less=True)
+ m.d.comb += e_match.eq(a1.e == b1.e)
+
+ aeqmb = Signal(reset_less=True)
+ m.d.comb += aeqmb.eq(s_nomatch & m_match & e_match)
+
+ abz = Signal(reset_less=True)
+ m.d.comb += abz.eq(a1.is_zero & b1.is_zero)
+
+ abnan = Signal(reset_less=True)
+ m.d.comb += abnan.eq(a1.is_nan | b1.is_nan)
+
+ bexp128s = Signal(reset_less=True)
+ m.d.comb += bexp128s.eq(b1.exp_128 & s_nomatch)
+
+ # if a is NaN or b is NaN return NaN
+ with m.If(abnan):
+ m.d.comb += self.o.out_do_z.eq(1)
+ m.d.comb += self.o.z.nan(0)
+
+ # XXX WEIRDNESS for FP16 non-canonical NaN handling
+ # under review
+
+ ## if a is zero and b is NaN return -b
+ #with m.If(a.is_zero & (a.s==0) & b.is_nan):
+ # m.d.comb += self.o.out_do_z.eq(1)
+ # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
+
+ ## if b is zero and a is NaN return -a
+ #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
+ # m.d.comb += self.o.out_do_z.eq(1)
+ # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
+
+ ## if a is -zero and b is NaN return -b
+ #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
+ # m.d.comb += self.o.out_do_z.eq(1)
+ # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
+
+ ## if b is -zero and a is NaN return -a
+ #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
+ # m.d.comb += self.o.out_do_z.eq(1)
+ # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
+
+ # if a is inf return inf (or NaN)
+ with m.Elif(a1.is_inf):
+ m.d.comb += self.o.out_do_z.eq(1)
+ m.d.comb += self.o.z.inf(a1.s)
+ # if a is inf and signs don't match return NaN
+ with m.If(bexp128s):
+ m.d.comb += self.o.z.nan(0)
+
+ # if b is inf return inf
+ with m.Elif(b1.is_inf):
+ m.d.comb += self.o.out_do_z.eq(1)
+ m.d.comb += self.o.z.inf(b1.s)
+
+ # if a is zero and b zero return signed-a/b
+ with m.Elif(abz):
+ m.d.comb += self.o.out_do_z.eq(1)
+ m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
+
+ # if a is zero return b
+ with m.Elif(a1.is_zero):
+ m.d.comb += self.o.out_do_z.eq(1)
+ m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
+
+ # if b is zero return a
+ with m.Elif(b1.is_zero):
+ m.d.comb += self.o.out_do_z.eq(1)
+ m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
+
+ # if a equal to -b return zero (+ve zero)
+ with m.Elif(aeqmb):
+ m.d.comb += self.o.out_do_z.eq(1)
+ m.d.comb += self.o.z.zero(0)
+
+ # Denormalised Number checks next, so pass a/b data through
+ with m.Else():
+ m.d.comb += self.o.out_do_z.eq(0)
+
+ m.d.comb += self.o.oz.eq(self.o.z.v)
+ m.d.comb += self.o.mid.eq(self.i.mid)
+
+ return m
+
+
+class FPAddSpecialCases(FPState):
+ """ special cases: NaNs, infs, zeros, denormalised
+ NOTE: some of these are unique to add. see "Special Operations"
+ https://steve.hollasch.net/cgindex/coding/ieeefloat.html
+ """
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "special_cases")
+ self.mod = FPAddSpecialCasesMod(width)
+ self.out_z = self.mod.ospec()
+ self.out_do_z = Signal(reset_less=True)
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ self.mod.setup(m, i, self.out_do_z)
+ m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
+ m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
+
+ def action(self, m):
+ self.idsync(m)
+ with m.If(self.out_do_z):
+ m.next = "put_z"
+ with m.Else():
+ m.next = "denormalise"
+
+
+class FPAddSpecialCasesDeNorm(FPState, SimpleHandshake):
+ """ special cases: NaNs, infs, zeros, denormalised
+ NOTE: some of these are unique to add. see "Special Operations"
+ https://steve.hollasch.net/cgindex/coding/ieeefloat.html
+ """
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "special_cases")
+ self.width = width
+ self.id_wid = id_wid
+ SimpleHandshake.__init__(self, self) # pipe is its own stage
+ self.out = self.ospec()
+
+ def ispec(self):
+ return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
+
+ def ospec(self):
+ return FPSCData(self.width, self.id_wid) # DeNorm ospec
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ smod = FPAddSpecialCasesMod(self.width, self.id_wid)
+ dmod = FPAddDeNormMod(self.width, self.id_wid)
+
+ chain = StageChain([smod, dmod])
+ chain.setup(m, i)
+
+ # only needed for break-out (early-out)
+ # self.out_do_z = smod.o.out_do_z
+
+ self.o = dmod.o
+
+ def process(self, i):
+ return self.o
+
+ def action(self, m):
+ # for break-out (early-out)
+ #with m.If(self.out_do_z):
+ # m.next = "put_z"
+ #with m.Else():
+ m.d.sync += self.out.eq(self.process(None))
+ m.next = "align"
+
+
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Cat, Mux, Array, Const
+from nmigen.cli import main, verilog
+from math import log
+
+from fpbase import FPOpIn, FPOpOut
+from fpbase import Trigger
+from singlepipe import (StageChain, SimpleHandshake)
+
+from fpbase import FPState, FPID
+from fpcommon.getop import (FPGetOp, FPADDBaseData, FPGet2Op)
+from fpcommon.denorm import (FPSCData, FPAddDeNorm)
+from fpcommon.postcalc import FPAddStage1Data
+from fpcommon.postnormalise import (FPNorm1Data,
+ FPNorm1Single, FPNorm1Multi)
+from fpcommon.roundz import (FPRoundData, FPRound)
+from fpcommon.corrections import FPCorrections
+from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
+from fpcommon.normtopack import FPNormToPack
+from fpcommon.putz import (FPPutZ, FPPutZIdx)
+
+from fpadd.specialcases import (FPAddSpecialCases, FPAddSpecialCasesDeNorm)
+from fpadd.align import (FPAddAlignMulti, FPAddAlignSingle)
+from fpadd.add0 import (FPAddStage0Data, FPAddStage0)
+from fpadd.add1 import (FPAddStage1Mod, FPAddStage1)
+from fpadd.addstages import FPAddAlignSingleAdd
+
+
+class FPOpData:
+ def __init__(self, width, id_wid):
+ self.z = FPOpOut(width)
+ self.z.data_o = Signal(width)
+ self.mid = Signal(id_wid, reset_less=True)
+
+ def __iter__(self):
+ yield self.z
+ yield self.mid
+
+ def eq(self, i):
+ return [self.z.eq(i.z), self.mid.eq(i.mid)]
+
+ def ports(self):
+ return list(self)
+
+
+class FPADDBaseMod:
+
+ def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
+ """ IEEE754 FP Add
+
+ * width: bit-width of IEEE754. supported: 16, 32, 64
+ * id_wid: an identifier that is sync-connected to the input
+ * single_cycle: True indicates each stage to complete in 1 clock
+ * compact: True indicates a reduced number of stages
+ """
+ self.width = width
+ self.id_wid = id_wid
+ self.single_cycle = single_cycle
+ self.compact = compact
+
+ self.in_t = Trigger()
+ self.i = self.ispec()
+ self.o = self.ospec()
+
+ self.states = []
+
+ def ispec(self):
+ return FPADDBaseData(self.width, self.id_wid)
+
+ def ospec(self):
+ return FPOpData(self.width, self.id_wid)
+
+ def add_state(self, state):
+ self.states.append(state)
+ return state
+
+ def elaborate(self, platform=None):
+ """ creates the HDL code-fragment for FPAdd
+ """
+ m = Module()
+ m.submodules.out_z = self.o.z
+ m.submodules.in_t = self.in_t
+ if self.compact:
+ self.get_compact_fragment(m, platform)
+ else:
+ self.get_longer_fragment(m, platform)
+
+ with m.FSM() as fsm:
+
+ for state in self.states:
+ with m.State(state.state_from):
+ state.action(m)
+
+ return m
+
+ def get_longer_fragment(self, m, platform=None):
+
+ get = self.add_state(FPGet2Op("get_ops", "special_cases",
+ self.width))
+ get.setup(m, self.i)
+ a = get.out_op1
+ b = get.out_op2
+ get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
+
+ sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
+ sc.setup(m, a, b, self.in_mid)
+
+ dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
+ dn.setup(m, a, b, sc.in_mid)
+
+ if self.single_cycle:
+ alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
+ alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
+ else:
+ alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
+ alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
+
+ add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
+ add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
+
+ add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
+ add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
+
+ if self.single_cycle:
+ n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
+ n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
+ else:
+ n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
+ n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
+
+ rn = self.add_state(FPRound(self.width, self.id_wid))
+ rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
+
+ cor = self.add_state(FPCorrections(self.width, self.id_wid))
+ cor.setup(m, rn.out_z, rn.in_mid)
+
+ pa = self.add_state(FPPack(self.width, self.id_wid))
+ pa.setup(m, cor.out_z, rn.in_mid)
+
+ ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
+ pa.in_mid, self.out_mid))
+
+ pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
+ pa.in_mid, self.out_mid))
+
+ def get_compact_fragment(self, m, platform=None):
+
+ get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
+ sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
+ alm = FPAddAlignSingleAdd(self.width, self.id_wid)
+ n1 = FPNormToPack(self.width, self.id_wid)
+
+ get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
+
+ chainlist = [get, sc, alm, n1]
+ chain = StageChain(chainlist, specallocate=True)
+ chain.setup(m, self.i)
+
+ for mod in chainlist:
+ sc = self.add_state(mod)
+
+ ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
+ n1.out_z.mid, self.o.mid))
+
+ #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
+ # sc.o.mid, self.o.mid))
+
+
+class FPADDBase(FPState):
+
+ def __init__(self, width, id_wid=None, single_cycle=False):
+ """ IEEE754 FP Add
+
+ * width: bit-width of IEEE754. supported: 16, 32, 64
+ * id_wid: an identifier that is sync-connected to the input
+ * single_cycle: True indicates each stage to complete in 1 clock
+ """
+ FPState.__init__(self, "fpadd")
+ self.width = width
+ self.single_cycle = single_cycle
+ self.mod = FPADDBaseMod(width, id_wid, single_cycle)
+ self.o = self.ospec()
+
+ self.in_t = Trigger()
+ self.i = self.ispec()
+
+ self.z_done = Signal(reset_less=True) # connects to out_z Strobe
+ self.in_accept = Signal(reset_less=True)
+ self.add_stb = Signal(reset_less=True)
+ self.add_ack = Signal(reset=0, reset_less=True)
+
+ def ispec(self):
+ return self.mod.ispec()
+
+ def ospec(self):
+ return self.mod.ospec()
+
+ def setup(self, m, i, add_stb, in_mid):
+ m.d.comb += [self.i.eq(i),
+ self.mod.i.eq(self.i),
+ self.z_done.eq(self.mod.o.z.trigger),
+ #self.add_stb.eq(add_stb),
+ self.mod.in_t.stb.eq(self.in_t.stb),
+ self.in_t.ack.eq(self.mod.in_t.ack),
+ self.o.mid.eq(self.mod.o.mid),
+ self.o.z.v.eq(self.mod.o.z.v),
+ self.o.z.valid_o.eq(self.mod.o.z.valid_o),
+ self.mod.o.z.ready_i.eq(self.o.z.ready_i_test),
+ ]
+
+ m.d.sync += self.add_stb.eq(add_stb)
+ m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
+ m.d.sync += self.o.z.ready_i.eq(0) # likewise
+ #m.d.sync += self.in_t.stb.eq(0)
+
+ m.submodules.fpadd = self.mod
+
+ def action(self, m):
+
+ # in_accept is set on incoming strobe HIGH and ack LOW.
+ m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
+
+ #with m.If(self.in_t.ack):
+ # m.d.sync += self.in_t.stb.eq(0)
+ with m.If(~self.z_done):
+ # not done: test for accepting an incoming operand pair
+ with m.If(self.in_accept):
+ m.d.sync += [
+ self.add_ack.eq(1), # acknowledge receipt...
+ self.in_t.stb.eq(1), # initiate add
+ ]
+ with m.Else():
+ m.d.sync += [self.add_ack.eq(0),
+ self.in_t.stb.eq(0),
+ self.o.z.ready_i.eq(1),
+ ]
+ with m.Else():
+ # done: acknowledge, and write out id and value
+ m.d.sync += [self.add_ack.eq(1),
+ self.in_t.stb.eq(0)
+ ]
+ m.next = "put_z"
+
+ return
+
+ if self.in_mid is not None:
+ m.d.sync += self.out_mid.eq(self.mod.out_mid)
+
+ m.d.sync += [
+ self.out_z.v.eq(self.mod.out_z.v)
+ ]
+ # move to output state on detecting z ack
+ with m.If(self.out_z.trigger):
+ m.d.sync += self.out_z.stb.eq(0)
+ m.next = "put_z"
+ with m.Else():
+ m.d.sync += self.out_z.stb.eq(1)
+
+
+class FPADD(FPID):
+ """ FPADD: stages as follows:
+
+ FPGetOp (a)
+ |
+ FPGetOp (b)
+ |
+ FPAddBase---> FPAddBaseMod
+ | |
+ PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
+
+ FPAddBase is tricky: it is both a stage and *has* stages.
+ Connection to FPAddBaseMod therefore requires an in stb/ack
+ and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
+ needs to be the thing that raises the incoming stb.
+ """
+
+ def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
+ """ IEEE754 FP Add
+
+ * width: bit-width of IEEE754. supported: 16, 32, 64
+ * id_wid: an identifier that is sync-connected to the input
+ * single_cycle: True indicates each stage to complete in 1 clock
+ """
+ self.width = width
+ self.id_wid = id_wid
+ self.single_cycle = single_cycle
+
+ #self.out_z = FPOp(width)
+ self.ids = FPID(id_wid)
+
+ rs = []
+ for i in range(rs_sz):
+ in_a = FPOpIn(width)
+ in_b = FPOpIn(width)
+ in_a.data_i = Signal(width)
+ in_b.data_i = Signal(width)
+ in_a.name = "in_a_%d" % i
+ in_b.name = "in_b_%d" % i
+ rs.append((in_a, in_b))
+ self.rs = Array(rs)
+
+ res = []
+ for i in range(rs_sz):
+ out_z = FPOpOut(width)
+ out_z.data_o = Signal(width)
+ out_z.name = "out_z_%d" % i
+ res.append(out_z)
+ self.res = Array(res)
+
+ self.states = []
+
+ def add_state(self, state):
+ self.states.append(state)
+ return state
+
+ def elaborate(self, platform=None):
+ """ creates the HDL code-fragment for FPAdd
+ """
+ m = Module()
+ #m.submodules += self.rs
+
+ in_a = self.rs[0][0]
+ in_b = self.rs[0][1]
+
+ geta = self.add_state(FPGetOp("get_a", "get_b",
+ in_a, self.width))
+ geta.setup(m, in_a)
+ a = geta.out_op
+
+ getb = self.add_state(FPGetOp("get_b", "fpadd",
+ in_b, self.width))
+ getb.setup(m, in_b)
+ b = getb.out_op
+
+ ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
+ ab = self.add_state(ab)
+ abd = ab.ispec() # create an input spec object for FPADDBase
+ m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
+ ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
+ o = ab.o
+
+ pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
+ o.mid, "get_a"))
+
+ with m.FSM() as fsm:
+
+ for state in self.states:
+ with m.State(state.state_from):
+ state.action(m)
+
+ return m
+
+
+if __name__ == "__main__":
+ if True:
+ alu = FPADD(width=32, id_wid=5, single_cycle=True)
+ main(alu, ports=alu.rs[0][0].ports() + \
+ alu.rs[0][1].ports() + \
+ alu.res[0].ports() + \
+ [alu.ids.in_mid, alu.ids.out_mid])
+ else:
+ alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
+ main(alu, ports=[alu.in_a, alu.in_b] + \
+ alu.in_t.ports() + \
+ alu.out_z.ports() + \
+ [alu.in_mid, alu.out_mid])
+
+
+ # works... but don't use, just do "python fname.py convert -t v"
+ #print (verilog.convert(alu, ports=[
+ # ports=alu.in_a.ports() + \
+ # alu.in_b.ports() + \
+ # alu.out_z.ports())
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Signal, Cat, Const, Mux, Module, Elaboratable
+from math import log
+from operator import or_
+from functools import reduce
+
+from singlepipe import PrevControl, NextControl
+from pipeline import ObjectProxy
+
+
+class MultiShiftR:
+
+ def __init__(self, width):
+ self.width = width
+ self.smax = int(log(width) / log(2))
+ self.i = Signal(width, reset_less=True)
+ self.s = Signal(self.smax, reset_less=True)
+ self.o = Signal(width, reset_less=True)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.o.eq(self.i >> self.s)
+ return m
+
+
+class MultiShift:
+ """ Generates variable-length single-cycle shifter from a series
+ of conditional tests on each bit of the left/right shift operand.
+ Each bit tested produces output shifted by that number of bits,
+ in a binary fashion: bit 1 if set shifts by 1 bit, bit 2 if set
+ shifts by 2 bits, each partial result cascading to the next Mux.
+
+ Could be adapted to do arithmetic shift by taking copies of the
+ MSB instead of zeros.
+ """
+
+ def __init__(self, width):
+ self.width = width
+ self.smax = int(log(width) / log(2))
+
+ def lshift(self, op, s):
+ res = op << s
+ return res[:len(op)]
+ res = op
+ for i in range(self.smax):
+ zeros = [0] * (1<<i)
+ res = Mux(s & (1<<i), Cat(zeros, res[0:-(1<<i)]), res)
+ return res
+
+ def rshift(self, op, s):
+ res = op >> s
+ return res[:len(op)]
+ res = op
+ for i in range(self.smax):
+ zeros = [0] * (1<<i)
+ res = Mux(s & (1<<i), Cat(res[(1<<i):], zeros), res)
+ return res
+
+
+class FPNumBase: #(Elaboratable):
+ """ Floating-point Base Number Class
+ """
+ def __init__(self, width, m_extra=True):
+ self.width = width
+ m_width = {16: 11, 32: 24, 64: 53}[width] # 1 extra bit (overflow)
+ e_width = {16: 7, 32: 10, 64: 13}[width] # 2 extra bits (overflow)
+ e_max = 1<<(e_width-3)
+ self.rmw = m_width # real mantissa width (not including extras)
+ self.e_max = e_max
+ if m_extra:
+ # mantissa extra bits (top,guard,round)
+ self.m_extra = 3
+ m_width += self.m_extra
+ else:
+ self.m_extra = 0
+ #print (m_width, e_width, e_max, self.rmw, self.m_extra)
+ self.m_width = m_width
+ self.e_width = e_width
+ self.e_start = self.rmw - 1
+ self.e_end = self.rmw + self.e_width - 3 # for decoding
+
+ self.v = Signal(width, reset_less=True) # Latched copy of value
+ self.m = Signal(m_width, reset_less=True) # Mantissa
+ self.e = Signal((e_width, True), reset_less=True) # Exponent: IEEE754exp+2 bits, signed
+ self.s = Signal(reset_less=True) # Sign bit
+
+ self.mzero = Const(0, (m_width, False))
+ m_msb = 1<<(self.m_width-2)
+ self.msb1 = Const(m_msb, (m_width, False))
+ self.m1s = Const(-1, (m_width, False))
+ self.P128 = Const(e_max, (e_width, True))
+ self.P127 = Const(e_max-1, (e_width, True))
+ self.N127 = Const(-(e_max-1), (e_width, True))
+ self.N126 = Const(-(e_max-2), (e_width, True))
+
+ self.is_nan = Signal(reset_less=True)
+ self.is_zero = Signal(reset_less=True)
+ self.is_inf = Signal(reset_less=True)
+ self.is_overflowed = Signal(reset_less=True)
+ self.is_denormalised = Signal(reset_less=True)
+ self.exp_128 = Signal(reset_less=True)
+ self.exp_sub_n126 = Signal((e_width, True), reset_less=True)
+ self.exp_lt_n126 = Signal(reset_less=True)
+ self.exp_gt_n126 = Signal(reset_less=True)
+ self.exp_gt127 = Signal(reset_less=True)
+ self.exp_n127 = Signal(reset_less=True)
+ self.exp_n126 = Signal(reset_less=True)
+ self.m_zero = Signal(reset_less=True)
+ self.m_msbzero = Signal(reset_less=True)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.is_nan.eq(self._is_nan())
+ m.d.comb += self.is_zero.eq(self._is_zero())
+ m.d.comb += self.is_inf.eq(self._is_inf())
+ m.d.comb += self.is_overflowed.eq(self._is_overflowed())
+ m.d.comb += self.is_denormalised.eq(self._is_denormalised())
+ m.d.comb += self.exp_128.eq(self.e == self.P128)
+ m.d.comb += self.exp_sub_n126.eq(self.e - self.N126)
+ m.d.comb += self.exp_gt_n126.eq(self.exp_sub_n126 > 0)
+ m.d.comb += self.exp_lt_n126.eq(self.exp_sub_n126 < 0)
+ m.d.comb += self.exp_gt127.eq(self.e > self.P127)
+ m.d.comb += self.exp_n127.eq(self.e == self.N127)
+ m.d.comb += self.exp_n126.eq(self.e == self.N126)
+ m.d.comb += self.m_zero.eq(self.m == self.mzero)
+ m.d.comb += self.m_msbzero.eq(self.m[self.e_start] == 0)
+
+ return m
+
+ def _is_nan(self):
+ return (self.exp_128) & (~self.m_zero)
+
+ def _is_inf(self):
+ return (self.exp_128) & (self.m_zero)
+
+ def _is_zero(self):
+ return (self.exp_n127) & (self.m_zero)
+
+ def _is_overflowed(self):
+ return self.exp_gt127
+
+ def _is_denormalised(self):
+ return (self.exp_n126) & (self.m_msbzero)
+
+ def __iter__(self):
+ yield self.s
+ yield self.e
+ yield self.m
+
+ def eq(self, inp):
+ return [self.s.eq(inp.s), self.e.eq(inp.e), self.m.eq(inp.m)]
+
+
+class FPNumOut(FPNumBase):
+ """ Floating-point Number Class
+
+ Contains signals for an incoming copy of the value, decoded into
+ sign / exponent / mantissa.
+ Also contains encoding functions, creation and recognition of
+ zero, NaN and inf (all signed)
+
+ Four extra bits are included in the mantissa: the top bit
+ (m[-1]) is effectively a carry-overflow. The other three are
+ guard (m[2]), round (m[1]), and sticky (m[0])
+ """
+ def __init__(self, width, m_extra=True):
+ FPNumBase.__init__(self, width, m_extra)
+
+ def elaborate(self, platform):
+ m = FPNumBase.elaborate(self, platform)
+
+ return m
+
+ def create(self, s, e, m):
+ """ creates a value from sign / exponent / mantissa
+
+ bias is added here, to the exponent
+ """
+ return [
+ self.v[-1].eq(s), # sign
+ self.v[self.e_start:self.e_end].eq(e + self.P127), # exp (add on bias)
+ self.v[0:self.e_start].eq(m) # mantissa
+ ]
+
+ def nan(self, s):
+ return self.create(s, self.P128, 1<<(self.e_start-1))
+
+ def inf(self, s):
+ return self.create(s, self.P128, 0)
+
+ def zero(self, s):
+ return self.create(s, self.N127, 0)
+
+ def create2(self, s, e, m):
+ """ creates a value from sign / exponent / mantissa
+
+ bias is added here, to the exponent
+ """
+ e = e + self.P127 # exp (add on bias)
+ return Cat(m[0:self.e_start],
+ e[0:self.e_end-self.e_start],
+ s)
+
+ def nan2(self, s):
+ return self.create2(s, self.P128, self.msb1)
+
+ def inf2(self, s):
+ return self.create2(s, self.P128, self.mzero)
+
+ def zero2(self, s):
+ return self.create2(s, self.N127, self.mzero)
+
+
+class MultiShiftRMerge(Elaboratable):
+ """ shifts down (right) and merges lower bits into m[0].
+ m[0] is the "sticky" bit, basically
+ """
+ def __init__(self, width, s_max=None):
+ if s_max is None:
+ s_max = int(log(width) / log(2))
+ self.smax = s_max
+ self.m = Signal(width, reset_less=True)
+ self.inp = Signal(width, reset_less=True)
+ self.diff = Signal(s_max, reset_less=True)
+ self.width = width
+
+ def elaborate(self, platform):
+ m = Module()
+
+ rs = Signal(self.width, reset_less=True)
+ m_mask = Signal(self.width, reset_less=True)
+ smask = Signal(self.width, reset_less=True)
+ stickybit = Signal(reset_less=True)
+ maxslen = Signal(self.smax, reset_less=True)
+ maxsleni = Signal(self.smax, reset_less=True)
+
+ sm = MultiShift(self.width-1)
+ m0s = Const(0, self.width-1)
+ mw = Const(self.width-1, len(self.diff))
+ m.d.comb += [maxslen.eq(Mux(self.diff > mw, mw, self.diff)),
+ maxsleni.eq(Mux(self.diff > mw, 0, mw-self.diff)),
+ ]
+
+ m.d.comb += [
+ # shift mantissa by maxslen, mask by inverse
+ rs.eq(sm.rshift(self.inp[1:], maxslen)),
+ m_mask.eq(sm.rshift(~m0s, maxsleni)),
+ smask.eq(self.inp[1:] & m_mask),
+ # sticky bit combines all mask (and mantissa low bit)
+ stickybit.eq(smask.bool() | self.inp[0]),
+ # mantissa result contains m[0] already.
+ self.m.eq(Cat(stickybit, rs))
+ ]
+ return m
+
+
+class FPNumShift(FPNumBase, Elaboratable):
+ """ Floating-point Number Class for shifting
+ """
+ def __init__(self, mainm, op, inv, width, m_extra=True):
+ FPNumBase.__init__(self, width, m_extra)
+ self.latch_in = Signal()
+ self.mainm = mainm
+ self.inv = inv
+ self.op = op
+
+ def elaborate(self, platform):
+ m = FPNumBase.elaborate(self, platform)
+
+ m.d.comb += self.s.eq(op.s)
+ m.d.comb += self.e.eq(op.e)
+ m.d.comb += self.m.eq(op.m)
+
+ with self.mainm.State("align"):
+ with m.If(self.e < self.inv.e):
+ m.d.sync += self.shift_down()
+
+ return m
+
+ def shift_down(self, inp):
+ """ shifts a mantissa down by one. exponent is increased to compensate
+
+ accuracy is lost as a result in the mantissa however there are 3
+ guard bits (the latter of which is the "sticky" bit)
+ """
+ return [self.e.eq(inp.e + 1),
+ self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0))
+ ]
+
+ def shift_down_multi(self, diff):
+ """ shifts a mantissa down. exponent is increased to compensate
+
+ accuracy is lost as a result in the mantissa however there are 3
+ guard bits (the latter of which is the "sticky" bit)
+
+ this code works by variable-shifting the mantissa by up to
+ its maximum bit-length: no point doing more (it'll still be
+ zero).
+
+ the sticky bit is computed by shifting a batch of 1s by
+ the same amount, which will introduce zeros. it's then
+ inverted and used as a mask to get the LSBs of the mantissa.
+ those are then |'d into the sticky bit.
+ """
+ sm = MultiShift(self.width)
+ mw = Const(self.m_width-1, len(diff))
+ maxslen = Mux(diff > mw, mw, diff)
+ rs = sm.rshift(self.m[1:], maxslen)
+ maxsleni = mw - maxslen
+ m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert
+
+ stickybits = reduce(or_, self.m[1:] & m_mask) | self.m[0]
+ return [self.e.eq(self.e + diff),
+ self.m.eq(Cat(stickybits, rs))
+ ]
+
+ def shift_up_multi(self, diff):
+ """ shifts a mantissa up. exponent is decreased to compensate
+ """
+ sm = MultiShift(self.width)
+ mw = Const(self.m_width, len(diff))
+ maxslen = Mux(diff > mw, mw, diff)
+
+ return [self.e.eq(self.e - diff),
+ self.m.eq(sm.lshift(self.m, maxslen))
+ ]
+
+
+class FPNumDecode(FPNumBase):
+ """ Floating-point Number Class
+
+ Contains signals for an incoming copy of the value, decoded into
+ sign / exponent / mantissa.
+ Also contains encoding functions, creation and recognition of
+ zero, NaN and inf (all signed)
+
+ Four extra bits are included in the mantissa: the top bit
+ (m[-1]) is effectively a carry-overflow. The other three are
+ guard (m[2]), round (m[1]), and sticky (m[0])
+ """
+ def __init__(self, op, width, m_extra=True):
+ FPNumBase.__init__(self, width, m_extra)
+ self.op = op
+
+ def elaborate(self, platform):
+ m = FPNumBase.elaborate(self, platform)
+
+ m.d.comb += self.decode(self.v)
+
+ return m
+
+ def decode(self, v):
+ """ decodes a latched value into sign / exponent / mantissa
+
+ bias is subtracted here, from the exponent. exponent
+ is extended to 10 bits so that subtract 127 is done on
+ a 10-bit number
+ """
+ args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
+ #print ("decode", self.e_end)
+ return [self.m.eq(Cat(*args)), # mantissa
+ self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp
+ self.s.eq(v[-1]), # sign
+ ]
+
+class FPNumIn(FPNumBase):
+ """ Floating-point Number Class
+
+ Contains signals for an incoming copy of the value, decoded into
+ sign / exponent / mantissa.
+ Also contains encoding functions, creation and recognition of
+ zero, NaN and inf (all signed)
+
+ Four extra bits are included in the mantissa: the top bit
+ (m[-1]) is effectively a carry-overflow. The other three are
+ guard (m[2]), round (m[1]), and sticky (m[0])
+ """
+ def __init__(self, op, width, m_extra=True):
+ FPNumBase.__init__(self, width, m_extra)
+ self.latch_in = Signal()
+ self.op = op
+
+ def decode2(self, m):
+ """ decodes a latched value into sign / exponent / mantissa
+
+ bias is subtracted here, from the exponent. exponent
+ is extended to 10 bits so that subtract 127 is done on
+ a 10-bit number
+ """
+ v = self.v
+ args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
+ #print ("decode", self.e_end)
+ res = ObjectProxy(m, pipemode=False)
+ res.m = Cat(*args) # mantissa
+ res.e = v[self.e_start:self.e_end] - self.P127 # exp
+ res.s = v[-1] # sign
+ return res
+
+ def decode(self, v):
+ """ decodes a latched value into sign / exponent / mantissa
+
+ bias is subtracted here, from the exponent. exponent
+ is extended to 10 bits so that subtract 127 is done on
+ a 10-bit number
+ """
+ args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
+ #print ("decode", self.e_end)
+ return [self.m.eq(Cat(*args)), # mantissa
+ self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp
+ self.s.eq(v[-1]), # sign
+ ]
+
+ def shift_down(self, inp):
+ """ shifts a mantissa down by one. exponent is increased to compensate
+
+ accuracy is lost as a result in the mantissa however there are 3
+ guard bits (the latter of which is the "sticky" bit)
+ """
+ return [self.e.eq(inp.e + 1),
+ self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0))
+ ]
+
+ def shift_down_multi(self, diff, inp=None):
+ """ shifts a mantissa down. exponent is increased to compensate
+
+ accuracy is lost as a result in the mantissa however there are 3
+ guard bits (the latter of which is the "sticky" bit)
+
+ this code works by variable-shifting the mantissa by up to
+ its maximum bit-length: no point doing more (it'll still be
+ zero).
+
+ the sticky bit is computed by shifting a batch of 1s by
+ the same amount, which will introduce zeros. it's then
+ inverted and used as a mask to get the LSBs of the mantissa.
+ those are then |'d into the sticky bit.
+ """
+ if inp is None:
+ inp = self
+ sm = MultiShift(self.width)
+ mw = Const(self.m_width-1, len(diff))
+ maxslen = Mux(diff > mw, mw, diff)
+ rs = sm.rshift(inp.m[1:], maxslen)
+ maxsleni = mw - maxslen
+ m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert
+
+ #stickybit = reduce(or_, inp.m[1:] & m_mask) | inp.m[0]
+ stickybit = (inp.m[1:] & m_mask).bool() | inp.m[0]
+ return [self.e.eq(inp.e + diff),
+ self.m.eq(Cat(stickybit, rs))
+ ]
+
+ def shift_up_multi(self, diff):
+ """ shifts a mantissa up. exponent is decreased to compensate
+ """
+ sm = MultiShift(self.width)
+ mw = Const(self.m_width, len(diff))
+ maxslen = Mux(diff > mw, mw, diff)
+
+ return [self.e.eq(self.e - diff),
+ self.m.eq(sm.lshift(self.m, maxslen))
+ ]
+
+class Trigger(Elaboratable):
+ def __init__(self):
+
+ self.stb = Signal(reset=0)
+ self.ack = Signal()
+ self.trigger = Signal(reset_less=True)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.trigger.eq(self.stb & self.ack)
+ return m
+
+ def eq(self, inp):
+ return [self.stb.eq(inp.stb),
+ self.ack.eq(inp.ack)
+ ]
+
+ def ports(self):
+ return [self.stb, self.ack]
+
+
+class FPOpIn(PrevControl):
+ def __init__(self, width):
+ PrevControl.__init__(self)
+ self.width = width
+
+ @property
+ def v(self):
+ return self.data_i
+
+ def chain_inv(self, in_op, extra=None):
+ stb = in_op.stb
+ if extra is not None:
+ stb = stb & extra
+ return [self.v.eq(in_op.v), # receive value
+ self.stb.eq(stb), # receive STB
+ in_op.ack.eq(~self.ack), # send ACK
+ ]
+
+ def chain_from(self, in_op, extra=None):
+ stb = in_op.stb
+ if extra is not None:
+ stb = stb & extra
+ return [self.v.eq(in_op.v), # receive value
+ self.stb.eq(stb), # receive STB
+ in_op.ack.eq(self.ack), # send ACK
+ ]
+
+
+class FPOpOut(NextControl):
+ def __init__(self, width):
+ NextControl.__init__(self)
+ self.width = width
+
+ @property
+ def v(self):
+ return self.data_o
+
+ def chain_inv(self, in_op, extra=None):
+ stb = in_op.stb
+ if extra is not None:
+ stb = stb & extra
+ return [self.v.eq(in_op.v), # receive value
+ self.stb.eq(stb), # receive STB
+ in_op.ack.eq(~self.ack), # send ACK
+ ]
+
+ def chain_from(self, in_op, extra=None):
+ stb = in_op.stb
+ if extra is not None:
+ stb = stb & extra
+ return [self.v.eq(in_op.v), # receive value
+ self.stb.eq(stb), # receive STB
+ in_op.ack.eq(self.ack), # send ACK
+ ]
+
+
+class Overflow: #(Elaboratable):
+ def __init__(self):
+ self.guard = Signal(reset_less=True) # tot[2]
+ self.round_bit = Signal(reset_less=True) # tot[1]
+ self.sticky = Signal(reset_less=True) # tot[0]
+ self.m0 = Signal(reset_less=True) # mantissa zero bit
+
+ self.roundz = Signal(reset_less=True)
+
+ def __iter__(self):
+ yield self.guard
+ yield self.round_bit
+ yield self.sticky
+ yield self.m0
+
+ def eq(self, inp):
+ return [self.guard.eq(inp.guard),
+ self.round_bit.eq(inp.round_bit),
+ self.sticky.eq(inp.sticky),
+ self.m0.eq(inp.m0)]
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.roundz.eq(self.guard & \
+ (self.round_bit | self.sticky | self.m0))
+ return m
+
+
+class FPBase:
+ """ IEEE754 Floating Point Base Class
+
+ contains common functions for FP manipulation, such as
+ extracting and packing operands, normalisation, denormalisation,
+ rounding etc.
+ """
+
+ def get_op(self, m, op, v, next_state):
+ """ this function moves to the next state and copies the operand
+ when both stb and ack are 1.
+ acknowledgement is sent by setting ack to ZERO.
+ """
+ res = v.decode2(m)
+ ack = Signal()
+ with m.If((op.ready_o) & (op.valid_i_test)):
+ m.next = next_state
+ # op is latched in from FPNumIn class on same ack/stb
+ m.d.comb += ack.eq(0)
+ with m.Else():
+ m.d.comb += ack.eq(1)
+ return [res, ack]
+
+ def denormalise(self, m, a):
+ """ denormalises a number. this is probably the wrong name for
+ this function. for normalised numbers (exponent != minimum)
+ one *extra* bit (the implicit 1) is added *back in*.
+ for denormalised numbers, the mantissa is left alone
+ and the exponent increased by 1.
+
+ both cases *effectively multiply the number stored by 2*,
+ which has to be taken into account when extracting the result.
+ """
+ with m.If(a.exp_n127):
+ m.d.sync += a.e.eq(a.N126) # limit a exponent
+ with m.Else():
+ m.d.sync += a.m[-1].eq(1) # set top mantissa bit
+
+ def op_normalise(self, m, op, next_state):
+ """ operand normalisation
+ NOTE: just like "align", this one keeps going round every clock
+ until the result's exponent is within acceptable "range"
+ """
+ with m.If((op.m[-1] == 0)): # check last bit of mantissa
+ m.d.sync +=[
+ op.e.eq(op.e - 1), # DECREASE exponent
+ op.m.eq(op.m << 1), # shift mantissa UP
+ ]
+ with m.Else():
+ m.next = next_state
+
+ def normalise_1(self, m, z, of, next_state):
+ """ first stage normalisation
+
+ NOTE: just like "align", this one keeps going round every clock
+ until the result's exponent is within acceptable "range"
+ NOTE: the weirdness of reassigning guard and round is due to
+ the extra mantissa bits coming from tot[0..2]
+ """
+ with m.If((z.m[-1] == 0) & (z.e > z.N126)):
+ m.d.sync += [
+ z.e.eq(z.e - 1), # DECREASE exponent
+ z.m.eq(z.m << 1), # shift mantissa UP
+ z.m[0].eq(of.guard), # steal guard bit (was tot[2])
+ of.guard.eq(of.round_bit), # steal round_bit (was tot[1])
+ of.round_bit.eq(0), # reset round bit
+ of.m0.eq(of.guard),
+ ]
+ with m.Else():
+ m.next = next_state
+
+ def normalise_2(self, m, z, of, next_state):
+ """ second stage normalisation
+
+ NOTE: just like "align", this one keeps going round every clock
+ until the result's exponent is within acceptable "range"
+ NOTE: the weirdness of reassigning guard and round is due to
+ the extra mantissa bits coming from tot[0..2]
+ """
+ with m.If(z.e < z.N126):
+ m.d.sync +=[
+ z.e.eq(z.e + 1), # INCREASE exponent
+ z.m.eq(z.m >> 1), # shift mantissa DOWN
+ of.guard.eq(z.m[0]),
+ of.m0.eq(z.m[1]),
+ of.round_bit.eq(of.guard),
+ of.sticky.eq(of.sticky | of.round_bit)
+ ]
+ with m.Else():
+ m.next = next_state
+
+ def roundz(self, m, z, roundz):
+ """ performs rounding on the output. TODO: different kinds of rounding
+ """
+ with m.If(roundz):
+ m.d.sync += z.m.eq(z.m + 1) # mantissa rounds up
+ with m.If(z.m == z.m1s): # all 1s
+ m.d.sync += z.e.eq(z.e + 1) # exponent rounds up
+
+ def corrections(self, m, z, next_state):
+ """ denormalisation and sign-bug corrections
+ """
+ m.next = next_state
+ # denormalised, correct exponent to zero
+ with m.If(z.is_denormalised):
+ m.d.sync += z.e.eq(z.N127)
+
+ def pack(self, m, z, next_state):
+ """ packs the result into the output (detects overflow->Inf)
+ """
+ m.next = next_state
+ # if overflow occurs, return inf
+ with m.If(z.is_overflowed):
+ m.d.sync += z.inf(z.s)
+ with m.Else():
+ m.d.sync += z.create(z.s, z.e, z.m)
+
+ def put_z(self, m, z, out_z, next_state):
+ """ put_z: stores the result in the output. raises stb and waits
+ for ack to be set to 1 before moving to the next state.
+ resets stb back to zero when that occurs, as acknowledgement.
+ """
+ m.d.sync += [
+ out_z.v.eq(z.v)
+ ]
+ with m.If(out_z.valid_o & out_z.ready_i_test):
+ m.d.sync += out_z.valid_o.eq(0)
+ m.next = next_state
+ with m.Else():
+ m.d.sync += out_z.valid_o.eq(1)
+
+
+class FPState(FPBase):
+ def __init__(self, state_from):
+ self.state_from = state_from
+
+ def set_inputs(self, inputs):
+ self.inputs = inputs
+ for k,v in inputs.items():
+ setattr(self, k, v)
+
+ def set_outputs(self, outputs):
+ self.outputs = outputs
+ for k,v in outputs.items():
+ setattr(self, k, v)
+
+
+class FPID:
+ def __init__(self, id_wid):
+ self.id_wid = id_wid
+ if self.id_wid:
+ self.in_mid = Signal(id_wid, reset_less=True)
+ self.out_mid = Signal(id_wid, reset_less=True)
+ else:
+ self.in_mid = None
+ self.out_mid = None
+
+ def idsync(self, m):
+ if self.id_wid is not None:
+ m.d.sync += self.out_mid.eq(self.in_mid)
+
+
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Elaboratable
+from nmigen.cli import main, verilog
+from fpbase import FPState
+from fpcommon.roundz import FPRoundData
+
+
+class FPCorrectionsMod(Elaboratable):
+
+ def __init__(self, width, id_wid):
+ self.width = width
+ self.id_wid = id_wid
+ self.i = self.ispec()
+ self.out_z = self.ospec()
+
+ def ispec(self):
+ return FPRoundData(self.width, self.id_wid)
+
+ def ospec(self):
+ return FPRoundData(self.width, self.id_wid)
+
+ def process(self, i):
+ return self.out_z
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ m.submodules.corrections = self
+ m.d.comb += self.i.eq(i)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.corr_in_z = self.i.z
+ m.submodules.corr_out_z = self.out_z.z
+ m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
+ with m.If(~self.i.out_do_z):
+ with m.If(self.i.z.is_denormalised):
+ m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
+ return m
+
+
+class FPCorrections(FPState):
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "corrections")
+ self.mod = FPCorrectionsMod(width)
+ self.out_z = self.ospec()
+
+ def ispec(self):
+ return self.mod.ispec()
+
+ def ospec(self):
+ return self.mod.ospec()
+
+ def setup(self, m, in_z):
+ """ links module to inputs and outputs
+ """
+ self.mod.setup(m, in_z)
+
+ m.d.sync += self.out_z.eq(self.mod.out_z)
+ m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
+
+ def action(self, m):
+ m.next = "pack"
+
+
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal
+from nmigen.cli import main, verilog
+from math import log
+
+from fpbase import FPNumIn, FPNumOut, FPNumBase
+from fpbase import FPState
+
+
+class FPSCData:
+
+ def __init__(self, width, id_wid):
+ self.a = FPNumBase(width, True)
+ self.b = FPNumBase(width, True)
+ self.z = FPNumOut(width, False)
+ self.oz = Signal(width, reset_less=True)
+ self.out_do_z = Signal(reset_less=True)
+ self.mid = Signal(id_wid, reset_less=True)
+
+ def __iter__(self):
+ yield from self.a
+ yield from self.b
+ yield from self.z
+ yield self.oz
+ yield self.out_do_z
+ yield self.mid
+
+ def eq(self, i):
+ return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
+ self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
+
+
+class FPAddDeNormMod(FPState):
+
+ def __init__(self, width, id_wid):
+ self.width = width
+ self.id_wid = id_wid
+ self.i = self.ispec()
+ self.o = self.ospec()
+
+ def ispec(self):
+ return FPSCData(self.width, self.id_wid)
+
+ def ospec(self):
+ return FPSCData(self.width, self.id_wid)
+
+ def process(self, i):
+ return self.o
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ m.submodules.denormalise = self
+ m.d.comb += self.i.eq(i)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.denorm_in_a = self.i.a
+ m.submodules.denorm_in_b = self.i.b
+ m.submodules.denorm_out_a = self.o.a
+ m.submodules.denorm_out_b = self.o.b
+
+ with m.If(~self.i.out_do_z):
+ # XXX hmmm, don't like repeating identical code
+ m.d.comb += self.o.a.eq(self.i.a)
+ with m.If(self.i.a.exp_n127):
+ m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
+ with m.Else():
+ m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
+
+ m.d.comb += self.o.b.eq(self.i.b)
+ with m.If(self.i.b.exp_n127):
+ m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
+ with m.Else():
+ m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
+
+ m.d.comb += self.o.mid.eq(self.i.mid)
+ m.d.comb += self.o.z.eq(self.i.z)
+ m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
+ m.d.comb += self.o.oz.eq(self.i.oz)
+
+ return m
+
+
+class FPAddDeNorm(FPState):
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "denormalise")
+ self.mod = FPAddDeNormMod(width)
+ self.out_a = FPNumBase(width)
+ self.out_b = FPNumBase(width)
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ self.mod.setup(m, i)
+
+ m.d.sync += self.out_a.eq(self.mod.out_a)
+ m.d.sync += self.out_b.eq(self.mod.out_b)
+
+ def action(self, m):
+ # Denormalised Number checks
+ m.next = "align"
+
+
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Cat, Mux, Array, Const, Elaboratable
+from nmigen.lib.coding import PriorityEncoder
+from nmigen.cli import main, verilog
+from math import log
+
+from fpbase import FPNumIn, FPNumOut, FPOpIn, Overflow, FPBase, FPNumBase
+from fpbase import MultiShiftRMerge, Trigger
+from singlepipe import (ControlBase, StageChain, SimpleHandshake,
+ PassThroughStage, PrevControl)
+from multipipe import CombMuxOutPipe
+from multipipe import PriorityCombMuxInPipe
+
+from fpbase import FPState
+import nmoperator
+
+
+class FPGetOpMod(Elaboratable):
+ def __init__(self, width):
+ self.in_op = FPOpIn(width)
+ self.in_op.data_i = Signal(width)
+ self.out_op = Signal(width)
+ self.out_decode = Signal(reset_less=True)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.out_decode.eq((self.in_op.ready_o) & \
+ (self.in_op.valid_i_test))
+ m.submodules.get_op_in = self.in_op
+ #m.submodules.get_op_out = self.out_op
+ with m.If(self.out_decode):
+ m.d.comb += [
+ self.out_op.eq(self.in_op.v),
+ ]
+ return m
+
+
+class FPGetOp(FPState):
+ """ gets operand
+ """
+
+ def __init__(self, in_state, out_state, in_op, width):
+ FPState.__init__(self, in_state)
+ self.out_state = out_state
+ self.mod = FPGetOpMod(width)
+ self.in_op = in_op
+ self.out_op = Signal(width)
+ self.out_decode = Signal(reset_less=True)
+
+ def setup(self, m, in_op):
+ """ links module to inputs and outputs
+ """
+ setattr(m.submodules, self.state_from, self.mod)
+ m.d.comb += nmoperator.eq(self.mod.in_op, in_op)
+ m.d.comb += self.out_decode.eq(self.mod.out_decode)
+
+ def action(self, m):
+ with m.If(self.out_decode):
+ m.next = self.out_state
+ m.d.sync += [
+ self.in_op.ready_o.eq(0),
+ self.out_op.eq(self.mod.out_op)
+ ]
+ with m.Else():
+ m.d.sync += self.in_op.ready_o.eq(1)
+
+
+class FPNumBase2Ops:
+
+ def __init__(self, width, id_wid, m_extra=True):
+ self.a = FPNumBase(width, m_extra)
+ self.b = FPNumBase(width, m_extra)
+ self.mid = Signal(id_wid, reset_less=True)
+
+ def eq(self, i):
+ return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
+
+ def ports(self):
+ return [self.a, self.b, self.mid]
+
+
+class FPADDBaseData:
+
+ def __init__(self, width, id_wid):
+ self.width = width
+ self.id_wid = id_wid
+ self.a = Signal(width)
+ self.b = Signal(width)
+ self.mid = Signal(id_wid, reset_less=True)
+
+ def eq(self, i):
+ return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
+
+ def ports(self):
+ return [self.a, self.b, self.mid]
+
+
+class FPGet2OpMod(PrevControl):
+ def __init__(self, width, id_wid):
+ PrevControl.__init__(self)
+ self.width = width
+ self.id_wid = id_wid
+ self.data_i = self.ispec()
+ self.i = self.data_i
+ self.o = self.ospec()
+
+ def ispec(self):
+ return FPADDBaseData(self.width, self.id_wid)
+
+ def ospec(self):
+ return FPADDBaseData(self.width, self.id_wid)
+
+ def process(self, i):
+ return self.o
+
+ def elaborate(self, platform):
+ m = PrevControl.elaborate(self, platform)
+ with m.If(self.trigger):
+ m.d.comb += [
+ self.o.eq(self.data_i),
+ ]
+ return m
+
+
+class FPGet2Op(FPState):
+ """ gets operands
+ """
+
+ def __init__(self, in_state, out_state, width, id_wid):
+ FPState.__init__(self, in_state)
+ self.out_state = out_state
+ self.mod = FPGet2OpMod(width, id_wid)
+ self.o = self.ospec()
+ self.in_stb = Signal(reset_less=True)
+ self.out_ack = Signal(reset_less=True)
+ self.out_decode = Signal(reset_less=True)
+
+ def ispec(self):
+ return self.mod.ispec()
+
+ def ospec(self):
+ return self.mod.ospec()
+
+ def trigger_setup(self, m, in_stb, in_ack):
+ """ links stb/ack
+ """
+ m.d.comb += self.mod.valid_i.eq(in_stb)
+ m.d.comb += in_ack.eq(self.mod.ready_o)
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ m.submodules.get_ops = self.mod
+ m.d.comb += self.mod.i.eq(i)
+ m.d.comb += self.out_ack.eq(self.mod.ready_o)
+ m.d.comb += self.out_decode.eq(self.mod.trigger)
+
+ def process(self, i):
+ return self.o
+
+ def action(self, m):
+ with m.If(self.out_decode):
+ m.next = self.out_state
+ m.d.sync += [
+ self.mod.ready_o.eq(0),
+ self.o.eq(self.mod.o),
+ ]
+ with m.Else():
+ m.d.sync += self.mod.ready_o.eq(1)
+
+
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+#from nmigen.cli import main, verilog
+
+from singlepipe import StageChain, SimpleHandshake
+
+from fpbase import FPState, FPID
+from fpcommon.postcalc import FPAddStage1Data
+from fpcommon.postnormalise import FPNorm1ModSingle
+from fpcommon.roundz import FPRoundMod
+from fpcommon.corrections import FPCorrectionsMod
+from fpcommon.pack import FPPackData, FPPackMod
+
+
+class FPNormToPack(FPState, SimpleHandshake):
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "normalise_1")
+ self.id_wid = id_wid
+ self.width = width
+ SimpleHandshake.__init__(self, self) # pipeline is its own stage
+
+ def ispec(self):
+ return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
+
+ def ospec(self):
+ return FPPackData(self.width, self.id_wid) # FPPackMod ospec
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+
+ # Normalisation, Rounding Corrections, Pack - in a chain
+ nmod = FPNorm1ModSingle(self.width, self.id_wid)
+ rmod = FPRoundMod(self.width, self.id_wid)
+ cmod = FPCorrectionsMod(self.width, self.id_wid)
+ pmod = FPPackMod(self.width, self.id_wid)
+ stages = [nmod, rmod, cmod, pmod]
+ chain = StageChain(stages)
+ chain.setup(m, i)
+ self.out_z = pmod.ospec()
+
+ self.o = pmod.o
+
+ def process(self, i):
+ return self.o
+
+ def action(self, m):
+ m.d.sync += self.out_z.eq(self.process(None))
+ m.next = "pack_put_z"
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Elaboratable
+from nmigen.cli import main, verilog
+
+from fpbase import FPNumOut
+from fpbase import FPState
+from fpcommon.roundz import FPRoundData
+from singlepipe import Object
+
+
+class FPPackData(Object):
+
+ def __init__(self, width, id_wid):
+ Object.__init__(self)
+ self.z = Signal(width, reset_less=True)
+ self.mid = Signal(id_wid, reset_less=True)
+
+
+class FPPackMod(Elaboratable):
+
+ def __init__(self, width, id_wid):
+ self.width = width
+ self.id_wid = id_wid
+ self.i = self.ispec()
+ self.o = self.ospec()
+
+ def ispec(self):
+ return FPRoundData(self.width, self.id_wid)
+
+ def ospec(self):
+ return FPPackData(self.width, self.id_wid)
+
+ def process(self, i):
+ return self.o
+
+ def setup(self, m, in_z):
+ """ links module to inputs and outputs
+ """
+ m.submodules.pack = self
+ m.d.comb += self.i.eq(in_z)
+
+ def elaborate(self, platform):
+ m = Module()
+ z = FPNumOut(self.width, False)
+ m.submodules.pack_in_z = self.i.z
+ m.submodules.pack_out_z = z
+ m.d.comb += self.o.mid.eq(self.i.mid)
+ with m.If(~self.i.out_do_z):
+ with m.If(self.i.z.is_overflowed):
+ m.d.comb += z.inf(self.i.z.s)
+ with m.Else():
+ m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
+ with m.Else():
+ m.d.comb += z.v.eq(self.i.oz)
+ m.d.comb += self.o.z.eq(z.v)
+ return m
+
+
+class FPPack(FPState):
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "pack")
+ self.mod = FPPackMod(width)
+ self.out_z = self.ospec()
+
+ def ispec(self):
+ return self.mod.ispec()
+
+ def ospec(self):
+ return self.mod.ospec()
+
+ def setup(self, m, in_z):
+ """ links module to inputs and outputs
+ """
+ self.mod.setup(m, in_z)
+
+ m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
+ m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
+
+ def action(self, m):
+ m.next = "pack_put_z"
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Signal
+from fpbase import Overflow, FPNumBase
+
+class FPAddStage1Data:
+
+ def __init__(self, width, id_wid):
+ self.z = FPNumBase(width, False)
+ self.out_do_z = Signal(reset_less=True)
+ self.oz = Signal(width, reset_less=True)
+ self.of = Overflow()
+ self.mid = Signal(id_wid, reset_less=True)
+
+ def __iter__(self):
+ yield from self.z
+ yield self.out_do_z
+ yield self.oz
+ yield from self.of
+ yield self.mid
+
+ def eq(self, i):
+ return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
+ self.of.eq(i.of), self.mid.eq(i.mid)]
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Cat, Mux, Elaboratable
+from nmigen.lib.coding import PriorityEncoder
+from nmigen.cli import main, verilog
+from math import log
+
+from fpbase import Overflow, FPNumBase
+from fpbase import MultiShiftRMerge
+from fpbase import FPState
+from .postcalc import FPAddStage1Data
+
+
+class FPNorm1Data:
+
+ def __init__(self, width, id_wid):
+ self.roundz = Signal(reset_less=True)
+ self.z = FPNumBase(width, False)
+ self.out_do_z = Signal(reset_less=True)
+ self.oz = Signal(width, reset_less=True)
+ self.mid = Signal(id_wid, reset_less=True)
+
+ def eq(self, i):
+ return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
+ self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
+
+
+class FPNorm1ModSingle(Elaboratable):
+
+ def __init__(self, width, id_wid):
+ self.width = width
+ self.id_wid = id_wid
+ self.i = self.ispec()
+ self.o = self.ospec()
+
+ def ispec(self):
+ return FPAddStage1Data(self.width, self.id_wid)
+
+ def ospec(self):
+ return FPNorm1Data(self.width, self.id_wid)
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ m.submodules.normalise_1 = self
+ m.d.comb += self.i.eq(i)
+
+ def process(self, i):
+ return self.o
+
+ def elaborate(self, platform):
+ m = Module()
+
+ mwid = self.o.z.m_width+2
+ pe = PriorityEncoder(mwid)
+ m.submodules.norm_pe = pe
+
+ of = Overflow()
+ m.d.comb += self.o.roundz.eq(of.roundz)
+
+ m.submodules.norm1_out_z = self.o.z
+ m.submodules.norm1_out_overflow = of
+ m.submodules.norm1_in_z = self.i.z
+ m.submodules.norm1_in_overflow = self.i.of
+
+ i = self.ispec()
+ m.submodules.norm1_insel_z = i.z
+ m.submodules.norm1_insel_overflow = i.of
+
+ espec = (len(i.z.e), True)
+ ediff_n126 = Signal(espec, reset_less=True)
+ msr = MultiShiftRMerge(mwid, espec)
+ m.submodules.multishift_r = msr
+
+ m.d.comb += i.eq(self.i)
+ # initialise out from in (overridden below)
+ m.d.comb += self.o.z.eq(i.z)
+ m.d.comb += of.eq(i.of)
+ # normalisation increase/decrease conditions
+ decrease = Signal(reset_less=True)
+ increase = Signal(reset_less=True)
+ m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
+ m.d.comb += increase.eq(i.z.exp_lt_n126)
+ # decrease exponent
+ with m.If(~self.i.out_do_z):
+ with m.If(decrease):
+ # *sigh* not entirely obvious: count leading zeros (clz)
+ # with a PriorityEncoder: to find from the MSB
+ # we reverse the order of the bits.
+ temp_m = Signal(mwid, reset_less=True)
+ temp_s = Signal(mwid+1, reset_less=True)
+ clz = Signal((len(i.z.e), True), reset_less=True)
+ # make sure that the amount to decrease by does NOT
+ # go below the minimum non-INF/NaN exponent
+ limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
+ i.z.exp_sub_n126)
+ m.d.comb += [
+ # cat round and guard bits back into the mantissa
+ temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
+ pe.i.eq(temp_m[::-1]), # inverted
+ clz.eq(limclz), # count zeros from MSB down
+ temp_s.eq(temp_m << clz), # shift mantissa UP
+ self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
+ self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
+ of.m0.eq(temp_s[2]), # copy of mantissa[0]
+ # overflow in bits 0..1: got shifted too (leave sticky)
+ of.guard.eq(temp_s[1]), # guard
+ of.round_bit.eq(temp_s[0]), # round
+ ]
+ # increase exponent
+ with m.Elif(increase):
+ temp_m = Signal(mwid+1, reset_less=True)
+ m.d.comb += [
+ temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
+ i.z.m)),
+ ediff_n126.eq(i.z.N126 - i.z.e),
+ # connect multi-shifter to inp/out mantissa (and ediff)
+ msr.inp.eq(temp_m),
+ msr.diff.eq(ediff_n126),
+ self.o.z.m.eq(msr.m[3:]),
+ of.m0.eq(temp_s[3]), # copy of mantissa[0]
+ # overflow in bits 0..1: got shifted too (leave sticky)
+ of.guard.eq(temp_s[2]), # guard
+ of.round_bit.eq(temp_s[1]), # round
+ of.sticky.eq(temp_s[0]), # sticky
+ self.o.z.e.eq(i.z.e + ediff_n126),
+ ]
+
+ m.d.comb += self.o.mid.eq(self.i.mid)
+ m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
+ m.d.comb += self.o.oz.eq(self.i.oz)
+
+ return m
+
+
+class FPNorm1ModMulti:
+
+ def __init__(self, width, single_cycle=True):
+ self.width = width
+ self.in_select = Signal(reset_less=True)
+ self.in_z = FPNumBase(width, False)
+ self.in_of = Overflow()
+ self.temp_z = FPNumBase(width, False)
+ self.temp_of = Overflow()
+ self.out_z = FPNumBase(width, False)
+ self.out_of = Overflow()
+
+ def elaborate(self, platform):
+ m = Module()
+
+ m.submodules.norm1_out_z = self.out_z
+ m.submodules.norm1_out_overflow = self.out_of
+ m.submodules.norm1_temp_z = self.temp_z
+ m.submodules.norm1_temp_of = self.temp_of
+ m.submodules.norm1_in_z = self.in_z
+ m.submodules.norm1_in_overflow = self.in_of
+
+ in_z = FPNumBase(self.width, False)
+ in_of = Overflow()
+ m.submodules.norm1_insel_z = in_z
+ m.submodules.norm1_insel_overflow = in_of
+
+ # select which of temp or in z/of to use
+ with m.If(self.in_select):
+ m.d.comb += in_z.eq(self.in_z)
+ m.d.comb += in_of.eq(self.in_of)
+ with m.Else():
+ m.d.comb += in_z.eq(self.temp_z)
+ m.d.comb += in_of.eq(self.temp_of)
+ # initialise out from in (overridden below)
+ m.d.comb += self.out_z.eq(in_z)
+ m.d.comb += self.out_of.eq(in_of)
+ # normalisation increase/decrease conditions
+ decrease = Signal(reset_less=True)
+ increase = Signal(reset_less=True)
+ m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
+ m.d.comb += increase.eq(in_z.exp_lt_n126)
+ m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
+ # decrease exponent
+ with m.If(decrease):
+ m.d.comb += [
+ self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
+ self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
+ self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
+ self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
+ self.out_of.round_bit.eq(0), # reset round bit
+ self.out_of.m0.eq(in_of.guard),
+ ]
+ # increase exponent
+ with m.Elif(increase):
+ m.d.comb += [
+ self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
+ self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
+ self.out_of.guard.eq(in_z.m[0]),
+ self.out_of.m0.eq(in_z.m[1]),
+ self.out_of.round_bit.eq(in_of.guard),
+ self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
+ ]
+
+ return m
+
+
+class FPNorm1Single(FPState):
+
+ def __init__(self, width, id_wid, single_cycle=True):
+ FPState.__init__(self, "normalise_1")
+ self.mod = FPNorm1ModSingle(width)
+ self.o = self.ospec()
+ self.out_z = FPNumBase(width, False)
+ self.out_roundz = Signal(reset_less=True)
+
+ def ispec(self):
+ return self.mod.ispec()
+
+ def ospec(self):
+ return self.mod.ospec()
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ self.mod.setup(m, i)
+
+ def action(self, m):
+ m.next = "round"
+
+
+class FPNorm1Multi(FPState):
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "normalise_1")
+ self.mod = FPNorm1ModMulti(width)
+ self.stb = Signal(reset_less=True)
+ self.ack = Signal(reset=0, reset_less=True)
+ self.out_norm = Signal(reset_less=True)
+ self.in_accept = Signal(reset_less=True)
+ self.temp_z = FPNumBase(width)
+ self.temp_of = Overflow()
+ self.out_z = FPNumBase(width)
+ self.out_roundz = Signal(reset_less=True)
+
+ def setup(self, m, in_z, in_of, norm_stb):
+ """ links module to inputs and outputs
+ """
+ self.mod.setup(m, in_z, in_of, norm_stb,
+ self.in_accept, self.temp_z, self.temp_of,
+ self.out_z, self.out_norm)
+
+ m.d.comb += self.stb.eq(norm_stb)
+ m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
+
+ def action(self, m):
+ m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
+ m.d.sync += self.temp_of.eq(self.mod.out_of)
+ m.d.sync += self.temp_z.eq(self.out_z)
+ with m.If(self.out_norm):
+ with m.If(self.in_accept):
+ m.d.sync += [
+ self.ack.eq(1),
+ ]
+ with m.Else():
+ m.d.sync += self.ack.eq(0)
+ with m.Else():
+ # normalisation not required (or done).
+ m.next = "round"
+ m.d.sync += self.ack.eq(1)
+ m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
+
+
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Cat,
+from nmigen.lib.coding import PriorityEncoder
+from nmigen.cli import main, verilog
+from math import log
+
+from fpbase import Overflow, FPNumBase
+from fpbase import MultiShiftRMerge
+
+from fpbase import FPState
+
+
+class FPNormaliseModSingle:
+
+ def __init__(self, width):
+ self.width = width
+ self.in_z = self.ispec()
+ self.out_z = self.ospec()
+
+ def ispec(self):
+ return FPNumBase(self.width, False)
+
+ def ospec(self):
+ return FPNumBase(self.width, False)
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ m.submodules.normalise = self
+ m.d.comb += self.i.eq(i)
+
+ def elaborate(self, platform):
+ m = Module()
+
+ mwid = self.out_z.m_width+2
+ pe = PriorityEncoder(mwid)
+ m.submodules.norm_pe = pe
+
+ m.submodules.norm1_out_z = self.out_z
+ m.submodules.norm1_in_z = self.in_z
+
+ in_z = FPNumBase(self.width, False)
+ in_of = Overflow()
+ m.submodules.norm1_insel_z = in_z
+ m.submodules.norm1_insel_overflow = in_of
+
+ espec = (len(in_z.e), True)
+ ediff_n126 = Signal(espec, reset_less=True)
+ msr = MultiShiftRMerge(mwid, espec)
+ m.submodules.multishift_r = msr
+
+ m.d.comb += in_z.eq(self.in_z)
+ m.d.comb += in_of.eq(self.in_of)
+ # initialise out from in (overridden below)
+ m.d.comb += self.out_z.eq(in_z)
+ m.d.comb += self.out_of.eq(in_of)
+ # normalisation decrease condition
+ decrease = Signal(reset_less=True)
+ m.d.comb += decrease.eq(in_z.m_msbzero)
+ # decrease exponent
+ with m.If(decrease):
+ # *sigh* not entirely obvious: count leading zeros (clz)
+ # with a PriorityEncoder: to find from the MSB
+ # we reverse the order of the bits.
+ temp_m = Signal(mwid, reset_less=True)
+ temp_s = Signal(mwid+1, reset_less=True)
+ clz = Signal((len(in_z.e), True), reset_less=True)
+ m.d.comb += [
+ # cat round and guard bits back into the mantissa
+ temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
+ pe.i.eq(temp_m[::-1]), # inverted
+ clz.eq(pe.o), # count zeros from MSB down
+ temp_s.eq(temp_m << clz), # shift mantissa UP
+ self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
+ self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
+ ]
+
+ return m
+
+
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Signal
+from nmigen.cli import main, verilog
+from fpbase import FPState
+
+
+class FPPutZ(FPState):
+
+ def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
+ FPState.__init__(self, state)
+ if to_state is None:
+ to_state = "get_ops"
+ self.to_state = to_state
+ self.in_z = in_z
+ self.out_z = out_z
+ self.in_mid = in_mid
+ self.out_mid = out_mid
+
+ def action(self, m):
+ if self.in_mid is not None:
+ m.d.sync += self.out_mid.eq(self.in_mid)
+ m.d.sync += [
+ self.out_z.z.v.eq(self.in_z)
+ ]
+ with m.If(self.out_z.z.valid_o & self.out_z.z.ready_i_test):
+ m.d.sync += self.out_z.z.valid_o.eq(0)
+ m.next = self.to_state
+ with m.Else():
+ m.d.sync += self.out_z.z.valid_o.eq(1)
+
+
+class FPPutZIdx(FPState):
+
+ def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
+ FPState.__init__(self, state)
+ if to_state is None:
+ to_state = "get_ops"
+ self.to_state = to_state
+ self.in_z = in_z
+ self.out_zs = out_zs
+ self.in_mid = in_mid
+
+ def action(self, m):
+ outz_stb = Signal(reset_less=True)
+ outz_ack = Signal(reset_less=True)
+ m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].valid_o),
+ outz_ack.eq(self.out_zs[self.in_mid].ready_i_test),
+ ]
+ m.d.sync += [
+ self.out_zs[self.in_mid].v.eq(self.in_z.v)
+ ]
+ with m.If(outz_stb & outz_ack):
+ m.d.sync += self.out_zs[self.in_mid].valid_o.eq(0)
+ m.next = self.to_state
+ with m.Else():
+ m.d.sync += self.out_zs[self.in_mid].valid_o.eq(1)
+
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Elaboratable
+from nmigen.cli import main, verilog
+
+from fpbase import FPNumBase
+from fpbase import FPState
+from fpcommon.postnormalise import FPNorm1Data
+
+
+class FPRoundData:
+
+ def __init__(self, width, id_wid):
+ self.z = FPNumBase(width, False)
+ self.out_do_z = Signal(reset_less=True)
+ self.oz = Signal(width, reset_less=True)
+ self.mid = Signal(id_wid, reset_less=True)
+
+ def eq(self, i):
+ return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
+ self.mid.eq(i.mid)]
+
+
+class FPRoundMod(Elaboratable):
+
+ def __init__(self, width, id_wid):
+ self.width = width
+ self.id_wid = id_wid
+ self.i = self.ispec()
+ self.out_z = self.ospec()
+
+ def ispec(self):
+ return FPNorm1Data(self.width, self.id_wid)
+
+ def ospec(self):
+ return FPRoundData(self.width, self.id_wid)
+
+ def process(self, i):
+ return self.out_z
+
+ def setup(self, m, i):
+ m.submodules.roundz = self
+ m.d.comb += self.i.eq(i)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
+ with m.If(~self.i.out_do_z):
+ with m.If(self.i.roundz):
+ m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
+ with m.If(self.i.z.m == self.i.z.m1s): # all 1s
+ m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
+
+ return m
+
+
+class FPRound(FPState):
+
+ def __init__(self, width, id_wid):
+ FPState.__init__(self, "round")
+ self.mod = FPRoundMod(width)
+ self.out_z = self.ospec()
+
+ def ispec(self):
+ return self.mod.ispec()
+
+ def ospec(self):
+ return self.mod.ospec()
+
+ def setup(self, m, i):
+ """ links module to inputs and outputs
+ """
+ self.mod.setup(m, i)
+
+ self.idsync(m)
+ m.d.sync += self.out_z.eq(self.mod.out_z)
+ m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
+
+ def action(self, m):
+ m.next = "corrections"
--- /dev/null
+from sfpy import Float32
+
+
+# XXX DO NOT USE, fails on num=65536. wark-wark...
+def sqrtsimple(num):
+ res = 0
+ bit = 1
+
+ while (bit < num):
+ bit <<= 2
+
+ while (bit != 0):
+ if (num >= res + bit):
+ num -= res + bit
+ res = (res >> 1) + bit
+ else:
+ res >>= 1
+ bit >>= 2
+
+ return res
+
+
+def sqrt(num):
+ D = num # D is input (from num)
+ Q = 0 # quotient
+ R = 0 # remainder
+ for i in range(64, -1, -1): # negative ranges are weird...
+
+ R = (R<<2)|((D>>(i+i))&3)
+
+ if R >= 0:
+ R -= ((Q<<2)|1) # -Q01
+ else:
+ R += ((Q<<2)|3) # +Q11
+
+ Q <<= 1
+ if R >= 0:
+ Q |= 1 # new Q
+
+ if R < 0:
+ R = R + ((Q<<1)|1)
+
+ return Q, R
+
+
+# grabbed these from unit_test_single (convenience, this is just experimenting)
+
+def get_mantissa(x):
+ return 0x7fffff & x
+
+def get_exponent(x):
+ return ((x & 0x7f800000) >> 23) - 127
+
+def set_exponent(x, e):
+ return (x & ~0x7f800000) | ((e+127) << 23)
+
+def get_sign(x):
+ return ((x & 0x80000000) >> 31)
+
+# convert FP32 to s/e/m
+def create_fp32(s, e, m):
+ """ receive sign, exponent, mantissa, return FP32 """
+ return set_exponent((s << 31) | get_mantissa(m))
+
+# convert s/e/m to FP32
+def decode_fp32(x):
+ """ receive FP32, return sign, exponent, mantissa """
+ return get_sign(x), get_exponent(x), get_mantissa(x)
+
+
+# main function, takes mantissa and exponent as separate arguments
+# returns a tuple, sqrt'd mantissa, sqrt'd exponent
+
+def main(mantissa, exponent):
+ if exponent & 1 != 0:
+ # shift mantissa up, subtract 1 from exp to compensate
+ mantissa <<= 1
+ exponent -= 1
+ m, r = sqrt(mantissa)
+ return m, r, exponent >> 1
+
+
+#normalization function
+def normalise(s, m, e, lowbits):
+ if (lowbits >= 2):
+ m += 1
+ if get_mantissa(m) == ((1<<24)-1):
+ e += 1
+ return s, m, e
+
+
+def fsqrt_test(x):
+
+ xbits = x.bits
+ print ("x", x, type(x))
+ sq_test = x.sqrt()
+ print ("sqrt", sq_test)
+
+ print (xbits, type(xbits))
+ s, e, m = decode_fp32(xbits)
+ print("x decode", s, e, m, hex(m))
+
+ m |= 1<<23 # set top bit (the missing "1" from mantissa)
+ m <<= 27
+
+ sm, sr, se = main(m, e)
+ lowbits = sm & 0x3
+ sm >>= 2
+ sm = get_mantissa(sm)
+ #sm += 2
+
+ s, sm, se = normalise(s, sm, se, lowbits)
+
+ print("our sqrt", s, se, sm, hex(sm), bin(sm), "lowbits", lowbits,
+ "rem", hex(sr))
+ if lowbits >= 2:
+ print ("probably needs rounding (+1 on mantissa)")
+
+ sq_xbits = sq_test.bits
+ s, e, m = decode_fp32(sq_xbits)
+ print ("sf32 sqrt", s, e, m, hex(m), bin(m))
+ print ()
+
+if __name__ == '__main__':
+
+ # quick test up to 1000 of two sqrt functions
+ for Q in range(1, int(1e4)):
+ print(Q, sqrt(Q), sqrtsimple(Q), int(Q**0.5))
+ assert int(Q**0.5) == sqrtsimple(Q), "Q sqrtsimpl fail %d" % Q
+ assert int(Q**0.5) == sqrt(Q)[0], "Q sqrt fail %d" % Q
+
+ # quick mantissa/exponent demo
+ for e in range(26):
+ for m in range(26):
+ ms, mr, es = main(m, e)
+ print("m:%d e:%d sqrt: m:%d-%d e:%d" % (m, e, ms, mr, es))
+
+ x = Float32(1234.123456789)
+ fsqrt_test(x)
+ x = Float32(32.1)
+ fsqrt_test(x)
+ x = Float32(16.0)
+ fsqrt_test(x)
+ x = Float32(8.0)
+ fsqrt_test(x)
+ x = Float32(8.5)
+ fsqrt_test(x)
+ x = Float32(3.14159265358979323)
+ fsqrt_test(x)
+ x = Float32(12.99392923123123)
+ fsqrt_test(x)
+ x = Float32(0.123456)
+ fsqrt_test(x)
+
+
+
+
+"""
+
+Notes:
+https://pdfs.semanticscholar.org/5060/4e9aff0e37089c4ab9a376c3f35761ffe28b.pdf
+
+//This is the main code of integer sqrt function found here:http://verilogcodes.blogspot.com/2017/11/a-verilog-function-for-finding-square-root.html
+//
+
+module testbench;
+
+reg [15:0] sqr;
+
+//Verilog function to find square root of a 32 bit number.
+//The output is 16 bit.
+function [15:0] sqrt;
+ input [31:0] num; //declare input
+ //intermediate signals.
+ reg [31:0] a;
+ reg [15:0] q;
+ reg [17:0] left,right,r;
+ integer i;
+begin
+ //initialize all the variables.
+ a = num;
+ q = 0;
+ i = 0;
+ left = 0; //input to adder/sub
+ right = 0; //input to adder/sub
+ r = 0; //remainder
+ //run the calculations for 16 iterations.
+ for(i=0;i<16;i=i+1) begin
+ right = {q,r[17],1'b1};
+ left = {r[15:0],a[31:30]};
+ a = {a[29:0],2'b00}; //left shift by 2 bits.
+ if (r[17] == 1) //add if r is negative
+ r = left + right;
+ else //subtract if r is positive
+ r = left - right;
+ q = {q[14:0],!r[17]};
+ end
+ sqrt = q; //final assignment of output.
+end
+endfunction //end of Function
+
+
+c version (from paper linked from URL)
+
+unsigned squart(D, r) /*Non-Restoring sqrt*/
+ unsigned D; /*D:32-bit unsigned integer to be square rooted */
+ int *r;
+{
+ unsigned Q = 0; /*Q:16-bit unsigned integer (root)*/
+ int R = 0; /*R:17-bit integer (remainder)*/
+ int i;
+ for (i = 15;i>=0;i--) /*for each root bit*/
+ {
+ if (R>=0)
+ { /*new remainder:*/
+ R = R<<2)|((D>>(i+i))&3);
+ R = R-((Q<<2)|1); /*-Q01*/
+ }
+ else
+ { /*new remainder:*/
+ R = R<<2)|((D>>(i+i))&3);
+ R = R+((Q<<2)|3); /*+Q11*/
+ }
+ if (R>=0) Q = Q<<1)|1; /*new Q:*/
+ else Q = Q<<1)|0; /*new Q:*/
+ }
+
+ /*remainder adjusting*/
+ if (R<0) R = R+((Q<<1)|1);
+ *r = R; /*return remainder*/
+ return(Q); /*return root*/
+}
+
+From wikipedia page:
+
+short isqrt(short num) {
+ short res = 0;
+ short bit = 1 << 14; // The second-to-top bit is set: 1 << 30 for 32 bits
+
+ // "bit" starts at the highest power of four <= the argument.
+ while (bit > num)
+ bit >>= 2;
+
+ while (bit != 0) {
+ if (num >= res + bit) {
+ num -= res + bit;
+ res = (res >> 1) + bit;
+ }
+ else
+ res >>= 1;
+ bit >>= 2;
+ }
+ return res;
+}
+
+"""
--- /dev/null
+from nmigen import Signal, Cat, Const, Mux, Module, Array
+from nmigen.cli import main, verilog
+
+from nmigen_add_experiment import FPADD
+from rstation_row import ReservationStationRow
+
+from math import log
+
+class FunctionUnit:
+
+ def __init__(self, width, num_units):
+ """ Function Unit
+
+ * width: bit-width of IEEE754. supported: 16, 32, 64
+ * num_units: number of Reservation Stations
+ """
+ self.width = width
+
+ fus = []
+ bsz = int(log(width) / log(2))
+ for i in range(num_units):
+ mid = Const(i, bsz)
+ rs = ReservationStationRow(width, mid)
+ rs.name = "RS%d" % i
+ fus.append(rs)
+ self.fus = Array(fus)
+
+ def elaborate(self, platform=None):
+ """ creates the HDL code-fragment for ReservationStationRow
+ """
+ m = Module()
+
+ return m
+
+
+if __name__ == "__main__":
+ rs = ReservationStationRow(width=32, id_wid=Const(1,4)
+ main(alu, ports=[rs.in_a, rs.in_b, rs.out_z]
+
+ # works... but don't use, just do "python fname.py convert -t v"
+ #print (verilog.convert(alu, ports=[
+ # ports=alu.in_a.ports() + \
+ # alu.in_b.ports() + \
+ # alu.out_z.ports())
--- /dev/null
+from nmigen import Module, Signal, Cat, Array, Const
+from nmigen.lib.coding import PriorityEncoder
+from math import log
+
+from fpbase import Trigger
+
+
+class FPGetSyncOpsMod:
+ def __init__(self, width, num_ops=2):
+ self.width = width
+ self.num_ops = num_ops
+ inops = []
+ outops = []
+ for i in range(num_ops):
+ inops.append(Signal(width, reset_less=True))
+ outops.append(Signal(width, reset_less=True))
+ self.in_op = inops
+ self.out_op = outops
+ self.stb = Signal(num_ops)
+ self.ack = Signal()
+ self.ready = Signal(reset_less=True)
+ self.out_decode = Signal(reset_less=True)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
+ m.d.comb += self.out_decode.eq(self.ack & self.ready)
+ with m.If(self.out_decode):
+ for i in range(self.num_ops):
+ m.d.comb += [
+ self.out_op[i].eq(self.in_op[i]),
+ ]
+ return m
+
+ def ports(self):
+ return self.in_op + self.out_op + [self.stb, self.ack]
+
+
+class FPOps(Trigger):
+ def __init__(self, width, num_ops):
+ Trigger.__init__(self)
+ self.width = width
+ self.num_ops = num_ops
+
+ res = []
+ for i in range(num_ops):
+ res.append(Signal(width))
+ self.v = Array(res)
+
+ def ports(self):
+ res = []
+ for i in range(self.num_ops):
+ res.append(self.v[i])
+ res.append(self.ack)
+ res.append(self.stb)
+ return res
+
+
+class InputGroup:
+ def __init__(self, width, num_ops=2, num_rows=4):
+ self.width = width
+ self.num_ops = num_ops
+ self.num_rows = num_rows
+ self.mmax = int(log(self.num_rows) / log(2))
+ self.rs = []
+ self.mid = Signal(self.mmax, reset_less=True) # multiplex id
+ for i in range(num_rows):
+ self.rs.append(FPGetSyncOpsMod(width, num_ops))
+ self.rs = Array(self.rs)
+
+ self.out_op = FPOps(width, num_ops)
+
+ def elaborate(self, platform):
+ m = Module()
+
+ pe = PriorityEncoder(self.num_rows)
+ m.submodules.selector = pe
+ m.submodules.out_op = self.out_op
+ m.submodules += self.rs
+
+ # connect priority encoder
+ in_ready = []
+ for i in range(self.num_rows):
+ in_ready.append(self.rs[i].ready)
+ m.d.comb += pe.i.eq(Cat(*in_ready))
+
+ active = Signal(reset_less=True)
+ out_en = Signal(reset_less=True)
+ m.d.comb += active.eq(~pe.n) # encoder active
+ m.d.comb += out_en.eq(active & self.out_op.trigger)
+
+ # encoder active: ack relevant input, record MID, pass output
+ with m.If(out_en):
+ rs = self.rs[pe.o]
+ m.d.sync += self.mid.eq(pe.o)
+ m.d.sync += rs.ack.eq(0)
+ m.d.sync += self.out_op.stb.eq(0)
+ for j in range(self.num_ops):
+ m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
+ with m.Else():
+ m.d.sync += self.out_op.stb.eq(1)
+ # acks all default to zero
+ for i in range(self.num_rows):
+ m.d.sync += self.rs[i].ack.eq(1)
+
+ return m
+
+ def ports(self):
+ res = []
+ for i in range(self.num_rows):
+ inop = self.rs[i]
+ res += inop.in_op + [inop.stb]
+ return self.out_op.ports() + res + [self.mid]
+
+
--- /dev/null
+""" IO Control API
+
+ Associated development bugs:
+ * http://bugs.libre-riscv.org/show_bug.cgi?id=64
+ * http://bugs.libre-riscv.org/show_bug.cgi?id=57
+
+ Stage API:
+ ---------
+
+ stage requires compliance with a strict API that may be
+ implemented in several means, including as a static class.
+
+ Stages do not HOLD data, and they definitely do not contain
+ signalling (ready/valid). They do however specify the FORMAT
+ of the incoming and outgoing data, and they provide a means to
+ PROCESS that data (from incoming format to outgoing format).
+
+ Stage Blocks really must be combinatorial blocks. It would be ok
+ to have input come in from sync'd sources (clock-driven) however by
+ doing so they would no longer be deterministic, and chaining such
+ blocks with such side-effects together could result in unexpected,
+ unpredictable, unreproduceable behaviour.
+ So generally to be avoided, then unless you know what you are doing.
+
+ the methods of a stage instance must be as follows:
+
+ * ispec() - Input data format specification. Takes a bit of explaining.
+ The requirements are: something that eventually derives from
+ nmigen Value must be returned *OR* an iterator or iterable
+ or sequence (list, tuple etc.) or generator must *yield*
+ thing(s) that (eventually) derive from the nmigen Value class.
+
+ Complex to state, very simple in practice:
+ see test_buf_pipe.py for over 25 worked examples.
+
+ * ospec() - Output data format specification.
+ format requirements identical to ispec.
+
+ * process(m, i) - Optional function for processing ispec-formatted data.
+ returns a combinatorial block of a result that
+ may be assigned to the output, by way of the "nmoperator.eq"
+ function. Note that what is returned here can be
+ extremely flexible. Even a dictionary can be returned
+ as long as it has fields that match precisely with the
+ Record into which its values is intended to be assigned.
+ Again: see example unit tests for details.
+
+ * setup(m, i) - Optional function for setting up submodules.
+ may be used for more complex stages, to link
+ the input (i) to submodules. must take responsibility
+ for adding those submodules to the module (m).
+ the submodules must be combinatorial blocks and
+ must have their inputs and output linked combinatorially.
+
+ Both StageCls (for use with non-static classes) and Stage (for use
+ by static classes) are abstract classes from which, for convenience
+ and as a courtesy to other developers, anything conforming to the
+ Stage API may *choose* to derive. See Liskov Substitution Principle:
+ https://en.wikipedia.org/wiki/Liskov_substitution_principle
+
+ StageChain:
+ ----------
+
+ A useful combinatorial wrapper around stages that chains them together
+ and then presents a Stage-API-conformant interface. By presenting
+ the same API as the stages it wraps, it can clearly be used recursively.
+
+ ControlBase:
+ -----------
+
+ The base class for pipelines. Contains previous and next ready/valid/data.
+ Also has an extremely useful "connect" function that can be used to
+ connect a chain of pipelines and present the exact same prev/next
+ ready/valid/data API.
+
+ Note: pipelines basically do not become pipelines as such until
+ handed to a derivative of ControlBase. ControlBase itself is *not*
+ strictly considered a pipeline class. Wishbone and AXI4 (master or
+ slave) could be derived from ControlBase, for example.
+"""
+
+from nmigen import Signal, Cat, Const, Module, Value, Elaboratable
+from nmigen.cli import verilog, rtlil
+from nmigen.hdl.rec import Record
+
+from collections.abc import Sequence, Iterable
+from collections import OrderedDict
+
+import nmoperator
+
+
+class Object:
+ def __init__(self):
+ self.fields = OrderedDict()
+
+ def __setattr__(self, k, v):
+ print ("kv", k, v)
+ if (k.startswith('_') or k in ["fields", "name", "src_loc"] or
+ k in dir(Object) or "fields" not in self.__dict__):
+ return object.__setattr__(self, k, v)
+ self.fields[k] = v
+
+ def __getattr__(self, k):
+ if k in self.__dict__:
+ return object.__getattr__(self, k)
+ try:
+ return self.fields[k]
+ except KeyError as e:
+ raise AttributeError(e)
+
+ def __iter__(self):
+ for x in self.fields.values(): # OrderedDict so order is preserved
+ if isinstance(x, Iterable):
+ yield from x
+ else:
+ yield x
+
+ def eq(self, inp):
+ res = []
+ for (k, o) in self.fields.items():
+ i = getattr(inp, k)
+ print ("eq", o, i)
+ rres = o.eq(i)
+ if isinstance(rres, Sequence):
+ res += rres
+ else:
+ res.append(rres)
+ print (res)
+ return res
+
+ def ports(self): # being called "keys" would be much better
+ return list(self)
+
+
+class RecordObject(Record):
+ def __init__(self, layout=None, name=None):
+ Record.__init__(self, layout=layout or [], name=None)
+
+ def __setattr__(self, k, v):
+ #print (dir(Record))
+ if (k.startswith('_') or k in ["fields", "name", "src_loc"] or
+ k in dir(Record) or "fields" not in self.__dict__):
+ return object.__setattr__(self, k, v)
+ self.fields[k] = v
+ #print ("RecordObject setattr", k, v)
+ if isinstance(v, Record):
+ newlayout = {k: (k, v.layout)}
+ elif isinstance(v, Value):
+ newlayout = {k: (k, v.shape())}
+ else:
+ newlayout = {k: (k, nmoperator.shape(v))}
+ self.layout.fields.update(newlayout)
+
+ def __iter__(self):
+ for x in self.fields.values(): # remember: fields is an OrderedDict
+ if isinstance(x, Iterable):
+ yield from x # a bit like flatten (nmigen.tools)
+ else:
+ yield x
+
+ def ports(self): # would be better being called "keys"
+ return list(self)
+
+
+class PrevControl(Elaboratable):
+ """ contains signals that come *from* the previous stage (both in and out)
+ * valid_i: previous stage indicating all incoming data is valid.
+ may be a multi-bit signal, where all bits are required
+ to be asserted to indicate "valid".
+ * ready_o: output to next stage indicating readiness to accept data
+ * data_i : an input - MUST be added by the USER of this class
+ """
+
+ def __init__(self, i_width=1, stage_ctl=False):
+ self.stage_ctl = stage_ctl
+ self.valid_i = Signal(i_width, name="p_valid_i") # prev >>in self
+ self._ready_o = Signal(name="p_ready_o") # prev <<out self
+ self.data_i = None # XXX MUST BE ADDED BY USER
+ if stage_ctl:
+ self.s_ready_o = Signal(name="p_s_o_rdy") # prev <<out self
+ self.trigger = Signal(reset_less=True)
+
+ @property
+ def ready_o(self):
+ """ public-facing API: indicates (externally) that stage is ready
+ """
+ if self.stage_ctl:
+ return self.s_ready_o # set dynamically by stage
+ return self._ready_o # return this when not under dynamic control
+
+ def _connect_in(self, prev, direct=False, fn=None, do_data=True):
+ """ internal helper function to connect stage to an input source.
+ do not use to connect stage-to-stage!
+ """
+ valid_i = prev.valid_i if direct else prev.valid_i_test
+ res = [self.valid_i.eq(valid_i),
+ prev.ready_o.eq(self.ready_o)]
+ if do_data is False:
+ return res
+ data_i = fn(prev.data_i) if fn is not None else prev.data_i
+ return res + [nmoperator.eq(self.data_i, data_i)]
+
+ @property
+ def valid_i_test(self):
+ vlen = len(self.valid_i)
+ if vlen > 1:
+ # multi-bit case: valid only when valid_i is all 1s
+ all1s = Const(-1, (len(self.valid_i), False))
+ valid_i = (self.valid_i == all1s)
+ else:
+ # single-bit valid_i case
+ valid_i = self.valid_i
+
+ # when stage indicates not ready, incoming data
+ # must "appear" to be not ready too
+ if self.stage_ctl:
+ valid_i = valid_i & self.s_ready_o
+
+ return valid_i
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.trigger.eq(self.valid_i_test & self.ready_o)
+ return m
+
+ def eq(self, i):
+ return [nmoperator.eq(self.data_i, i.data_i),
+ self.ready_o.eq(i.ready_o),
+ self.valid_i.eq(i.valid_i)]
+
+ def __iter__(self):
+ yield self.valid_i
+ yield self.ready_o
+ if hasattr(self.data_i, "ports"):
+ yield from self.data_i.ports()
+ elif isinstance(self.data_i, Sequence):
+ yield from self.data_i
+ else:
+ yield self.data_i
+
+ def ports(self):
+ return list(self)
+
+
+class NextControl(Elaboratable):
+ """ contains the signals that go *to* the next stage (both in and out)
+ * valid_o: output indicating to next stage that data is valid
+ * ready_i: input from next stage indicating that it can accept data
+ * data_o : an output - MUST be added by the USER of this class
+ """
+ def __init__(self, stage_ctl=False):
+ self.stage_ctl = stage_ctl
+ self.valid_o = Signal(name="n_valid_o") # self out>> next
+ self.ready_i = Signal(name="n_ready_i") # self <<in next
+ self.data_o = None # XXX MUST BE ADDED BY USER
+ #if self.stage_ctl:
+ self.d_valid = Signal(reset=1) # INTERNAL (data valid)
+ self.trigger = Signal(reset_less=True)
+
+ @property
+ def ready_i_test(self):
+ if self.stage_ctl:
+ return self.ready_i & self.d_valid
+ return self.ready_i
+
+ def connect_to_next(self, nxt, do_data=True):
+ """ helper function to connect to the next stage data/valid/ready.
+ data/valid is passed *TO* nxt, and ready comes *IN* from nxt.
+ use this when connecting stage-to-stage
+ """
+ res = [nxt.valid_i.eq(self.valid_o),
+ self.ready_i.eq(nxt.ready_o)]
+ if do_data:
+ res.append(nmoperator.eq(nxt.data_i, self.data_o))
+ return res
+
+ def _connect_out(self, nxt, direct=False, fn=None, do_data=True):
+ """ internal helper function to connect stage to an output source.
+ do not use to connect stage-to-stage!
+ """
+ ready_i = nxt.ready_i if direct else nxt.ready_i_test
+ res = [nxt.valid_o.eq(self.valid_o),
+ self.ready_i.eq(ready_i)]
+ if not do_data:
+ return res
+ data_o = fn(nxt.data_o) if fn is not None else nxt.data_o
+ return res + [nmoperator.eq(data_o, self.data_o)]
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.trigger.eq(self.ready_i_test & self.valid_o)
+ return m
+
+ def __iter__(self):
+ yield self.ready_i
+ yield self.valid_o
+ if hasattr(self.data_o, "ports"):
+ yield from self.data_o.ports()
+ elif isinstance(self.data_o, Sequence):
+ yield from self.data_o
+ else:
+ yield self.data_o
+
+ def ports(self):
+ return list(self)
+
--- /dev/null
+""" Combinatorial Multi-input and Multi-output multiplexer blocks
+ conforming to Pipeline API
+
+ Multi-input is complex because if any one input is ready, the output
+ can be ready, and the decision comes from a separate module.
+
+ Multi-output is simple (pretty much identical to UnbufferedPipeline),
+ and the selection is just a mux. The only proviso (difference) being:
+ the outputs not being selected have to have their ready_o signals
+ DEASSERTED.
+"""
+
+from math import log
+from nmigen import Signal, Cat, Const, Mux, Module, Array, Elaboratable
+from nmigen.cli import verilog, rtlil
+from nmigen.lib.coding import PriorityEncoder
+from nmigen.hdl.rec import Record, Layout
+from stageapi import _spec
+
+from collections.abc import Sequence
+
+from example_buf_pipe import eq, NextControl, PrevControl, ExampleStage
+
+
+class MultiInControlBase(Elaboratable):
+ """ Common functions for Pipeline API
+ """
+ def __init__(self, in_multi=None, p_len=1):
+ """ Multi-input Control class. Conforms to same API as ControlBase...
+ mostly. has additional indices to the *multiple* input stages
+
+ * p: contains ready/valid to the previous stages PLURAL
+ * n: contains ready/valid to the next stage
+
+ User must also:
+ * add data_i members to PrevControl and
+ * add data_o member to NextControl
+ """
+ # set up input and output IO ACK (prev/next ready/valid)
+ p = []
+ for i in range(p_len):
+ p.append(PrevControl(in_multi))
+ self.p = Array(p)
+ self.n = NextControl()
+
+ def connect_to_next(self, nxt, p_idx=0):
+ """ helper function to connect to the next stage data/valid/ready.
+ """
+ return self.n.connect_to_next(nxt.p[p_idx])
+
+ def _connect_in(self, prev, idx=0, prev_idx=None):
+ """ helper function to connect stage to an input source. do not
+ use to connect stage-to-stage!
+ """
+ if prev_idx is None:
+ return self.p[idx]._connect_in(prev.p)
+ return self.p[idx]._connect_in(prev.p[prev_idx])
+
+ def _connect_out(self, nxt):
+ """ helper function to connect stage to an output source. do not
+ use to connect stage-to-stage!
+ """
+ if nxt_idx is None:
+ return self.n._connect_out(nxt.n)
+ return self.n._connect_out(nxt.n)
+
+ def set_input(self, i, idx=0):
+ """ helper function to set the input data
+ """
+ return eq(self.p[idx].data_i, i)
+
+ def elaborate(self, platform):
+ m = Module()
+ for i, p in enumerate(self.p):
+ setattr(m.submodules, "p%d" % i, p)
+ m.submodules.n = self.n
+ return m
+
+ def __iter__(self):
+ for p in self.p:
+ yield from p
+ yield from self.n
+
+ def ports(self):
+ return list(self)
+
+
+class MultiOutControlBase(Elaboratable):
+ """ Common functions for Pipeline API
+ """
+ def __init__(self, n_len=1, in_multi=None):
+ """ Multi-output Control class. Conforms to same API as ControlBase...
+ mostly. has additional indices to the multiple *output* stages
+ [MultiInControlBase has multiple *input* stages]
+
+ * p: contains ready/valid to the previou stage
+ * n: contains ready/valid to the next stages PLURAL
+
+ User must also:
+ * add data_i member to PrevControl and
+ * add data_o members to NextControl
+ """
+
+ # set up input and output IO ACK (prev/next ready/valid)
+ self.p = PrevControl(in_multi)
+ n = []
+ for i in range(n_len):
+ n.append(NextControl())
+ self.n = Array(n)
+
+ def connect_to_next(self, nxt, n_idx=0):
+ """ helper function to connect to the next stage data/valid/ready.
+ """
+ return self.n[n_idx].connect_to_next(nxt.p)
+
+ def _connect_in(self, prev, idx=0):
+ """ helper function to connect stage to an input source. do not
+ use to connect stage-to-stage!
+ """
+ return self.n[idx]._connect_in(prev.p)
+
+ def _connect_out(self, nxt, idx=0, nxt_idx=None):
+ """ helper function to connect stage to an output source. do not
+ use to connect stage-to-stage!
+ """
+ if nxt_idx is None:
+ return self.n[idx]._connect_out(nxt.n)
+ return self.n[idx]._connect_out(nxt.n[nxt_idx])
+
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.p = self.p
+ for i, n in enumerate(self.n):
+ setattr(m.submodules, "n%d" % i, n)
+ return m
+
+ def set_input(self, i):
+ """ helper function to set the input data
+ """
+ return eq(self.p.data_i, i)
+
+ def __iter__(self):
+ yield from self.p
+ for n in self.n:
+ yield from n
+
+ def ports(self):
+ return list(self)
+
+
+class CombMultiOutPipeline(MultiOutControlBase):
+ """ A multi-input Combinatorial block conforming to the Pipeline API
+
+ Attributes:
+ -----------
+ p.data_i : stage input data (non-array). shaped according to ispec
+ n.data_o : stage output data array. shaped according to ospec
+ """
+
+ def __init__(self, stage, n_len, n_mux):
+ MultiOutControlBase.__init__(self, n_len=n_len)
+ self.stage = stage
+ self.n_mux = n_mux
+
+ # set up the input and output data
+ self.p.data_i = _spec(stage.ispec, 'data_i') # input type
+ for i in range(n_len):
+ name = 'data_o_%d' % i
+ self.n[i].data_o = _spec(stage.ospec, name) # output type
+
+ def process(self, i):
+ if hasattr(self.stage, "process"):
+ return self.stage.process(i)
+ return i
+
+ def elaborate(self, platform):
+ m = MultiOutControlBase.elaborate(self, platform)
+
+ if hasattr(self.n_mux, "elaborate"): # TODO: identify submodule?
+ m.submodules += self.n_mux
+
+ # need buffer register conforming to *input* spec
+ r_data = _spec(self.stage.ispec, 'r_data') # input type
+ if hasattr(self.stage, "setup"):
+ self.stage.setup(m, r_data)
+
+ # multiplexer id taken from n_mux
+ mid = self.n_mux.m_id
+
+ # temporaries
+ p_valid_i = Signal(reset_less=True)
+ pv = Signal(reset_less=True)
+ m.d.comb += p_valid_i.eq(self.p.valid_i_test)
+ m.d.comb += pv.eq(self.p.valid_i & self.p.ready_o)
+
+ # all outputs to next stages first initialised to zero (invalid)
+ # the only output "active" is then selected by the muxid
+ for i in range(len(self.n)):
+ m.d.comb += self.n[i].valid_o.eq(0)
+ data_valid = self.n[mid].valid_o
+ m.d.comb += self.p.ready_o.eq(~data_valid | self.n[mid].ready_i)
+ m.d.comb += data_valid.eq(p_valid_i | \
+ (~self.n[mid].ready_i & data_valid))
+ with m.If(pv):
+ m.d.comb += eq(r_data, self.p.data_i)
+ m.d.comb += eq(self.n[mid].data_o, self.process(r_data))
+
+ return m
+
+
+class CombMultiInPipeline(MultiInControlBase):
+ """ A multi-input Combinatorial block conforming to the Pipeline API
+
+ Attributes:
+ -----------
+ p.data_i : StageInput, shaped according to ispec
+ The pipeline input
+ p.data_o : StageOutput, shaped according to ospec
+ The pipeline output
+ r_data : input_shape according to ispec
+ A temporary (buffered) copy of a prior (valid) input.
+ This is HELD if the output is not ready. It is updated
+ SYNCHRONOUSLY.
+ """
+
+ def __init__(self, stage, p_len, p_mux):
+ MultiInControlBase.__init__(self, p_len=p_len)
+ self.stage = stage
+ self.p_mux = p_mux
+
+ # set up the input and output data
+ for i in range(p_len):
+ name = 'data_i_%d' % i
+ self.p[i].data_i = _spec(stage.ispec, name) # input type
+ self.n.data_o = _spec(stage.ospec, 'data_o')
+
+ def process(self, i):
+ if hasattr(self.stage, "process"):
+ return self.stage.process(i)
+ return i
+
+ def elaborate(self, platform):
+ m = MultiInControlBase.elaborate(self, platform)
+
+ m.submodules += self.p_mux
+
+ # need an array of buffer registers conforming to *input* spec
+ r_data = []
+ data_valid = []
+ p_valid_i = []
+ n_ready_in = []
+ p_len = len(self.p)
+ for i in range(p_len):
+ name = 'r_%d' % i
+ r = _spec(self.stage.ispec, name) # input type
+ r_data.append(r)
+ data_valid.append(Signal(name="data_valid", reset_less=True))
+ p_valid_i.append(Signal(name="p_valid_i", reset_less=True))
+ n_ready_in.append(Signal(name="n_ready_in", reset_less=True))
+ if hasattr(self.stage, "setup"):
+ self.stage.setup(m, r)
+ if len(r_data) > 1:
+ r_data = Array(r_data)
+ p_valid_i = Array(p_valid_i)
+ n_ready_in = Array(n_ready_in)
+ data_valid = Array(data_valid)
+
+ nirn = Signal(reset_less=True)
+ m.d.comb += nirn.eq(~self.n.ready_i)
+ mid = self.p_mux.m_id
+ for i in range(p_len):
+ m.d.comb += data_valid[i].eq(0)
+ m.d.comb += n_ready_in[i].eq(1)
+ m.d.comb += p_valid_i[i].eq(0)
+ m.d.comb += self.p[i].ready_o.eq(0)
+ m.d.comb += p_valid_i[mid].eq(self.p_mux.active)
+ m.d.comb += self.p[mid].ready_o.eq(~data_valid[mid] | self.n.ready_i)
+ m.d.comb += n_ready_in[mid].eq(nirn & data_valid[mid])
+ anyvalid = Signal(i, reset_less=True)
+ av = []
+ for i in range(p_len):
+ av.append(data_valid[i])
+ anyvalid = Cat(*av)
+ m.d.comb += self.n.valid_o.eq(anyvalid.bool())
+ m.d.comb += data_valid[mid].eq(p_valid_i[mid] | \
+ (n_ready_in[mid] & data_valid[mid]))
+
+ for i in range(p_len):
+ vr = Signal(reset_less=True)
+ m.d.comb += vr.eq(self.p[i].valid_i & self.p[i].ready_o)
+ with m.If(vr):
+ m.d.comb += eq(r_data[i], self.p[i].data_i)
+
+ m.d.comb += eq(self.n.data_o, self.process(r_data[mid]))
+
+ return m
+
+
+class CombMuxOutPipe(CombMultiOutPipeline):
+ def __init__(self, stage, n_len):
+ # HACK: stage is also the n-way multiplexer
+ CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
+
+ # HACK: n-mux is also the stage... so set the muxid equal to input mid
+ stage.m_id = self.p.data_i.mid
+
+
+
+class InputPriorityArbiter(Elaboratable):
+ """ arbitration module for Input-Mux pipe, baed on PriorityEncoder
+ """
+ def __init__(self, pipe, num_rows):
+ self.pipe = pipe
+ self.num_rows = num_rows
+ self.mmax = int(log(self.num_rows) / log(2))
+ self.m_id = Signal(self.mmax, reset_less=True) # multiplex id
+ self.active = Signal(reset_less=True)
+
+ def elaborate(self, platform):
+ m = Module()
+
+ assert len(self.pipe.p) == self.num_rows, \
+ "must declare input to be same size"
+ pe = PriorityEncoder(self.num_rows)
+ m.submodules.selector = pe
+
+ # connect priority encoder
+ in_ready = []
+ for i in range(self.num_rows):
+ p_valid_i = Signal(reset_less=True)
+ m.d.comb += p_valid_i.eq(self.pipe.p[i].valid_i_test)
+ in_ready.append(p_valid_i)
+ m.d.comb += pe.i.eq(Cat(*in_ready)) # array of input "valids"
+ m.d.comb += self.active.eq(~pe.n) # encoder active (one input valid)
+ m.d.comb += self.m_id.eq(pe.o) # output one active input
+
+ return m
+
+ def ports(self):
+ return [self.m_id, self.active]
+
+
+
+class PriorityCombMuxInPipe(CombMultiInPipeline):
+ """ an example of how to use the combinatorial pipeline.
+ """
+
+ def __init__(self, stage, p_len=2):
+ p_mux = InputPriorityArbiter(self, p_len)
+ CombMultiInPipeline.__init__(self, stage, p_len, p_mux)
+
+
+if __name__ == '__main__':
+
+ dut = PriorityCombMuxInPipe(ExampleStage)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_combpipe.il", "w") as f:
+ f.write(vl)
--- /dev/null
+# IEEE Floating Point Adder (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen.cli import main, verilog
+from fpadd.statemachine import FPADDBase, FPADD
+from fpadd.pipeline import FPADDMuxInOut
+
+if __name__ == "__main__":
+ if True:
+ alu = FPADD(width=32, id_wid=5, single_cycle=True)
+ main(alu, ports=alu.rs[0][0].ports() + \
+ alu.rs[0][1].ports() + \
+ alu.res[0].ports() + \
+ [alu.ids.in_mid, alu.ids.out_mid])
+ else:
+ alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
+ main(alu, ports=[alu.in_a, alu.in_b] + \
+ alu.in_t.ports() + \
+ alu.out_z.ports() + \
+ [alu.in_mid, alu.out_mid])
+
+
+ # works... but don't use, just do "python fname.py convert -t v"
+ #print (verilog.convert(alu, ports=[
+ # ports=alu.in_a.ports() + \
+ # alu.in_b.ports() + \
+ # alu.out_z.ports())
--- /dev/null
+# IEEE Floating Point Divider (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Const, Cat
+from nmigen.cli import main, verilog
+
+from fpbase import FPNumIn, FPNumOut, FPOpIn, FPOpOut, Overflow, FPBase, FPState
+from singlepipe import eq
+
+class Div:
+ def __init__(self, width):
+ self.width = width
+ self.quot = Signal(width) # quotient
+ self.dor = Signal(width) # divisor
+ self.dend = Signal(width) # dividend
+ self.rem = Signal(width) # remainder
+ self.count = Signal(7) # loop count
+
+ self.czero = Const(0, width)
+
+ def reset(self, m):
+ m.d.sync += [
+ self.quot.eq(self.czero),
+ self.rem.eq(self.czero),
+ self.count.eq(Const(0, 7))
+ ]
+
+
+class FPDIV(FPBase):
+
+ def __init__(self, width):
+ FPBase.__init__(self)
+ self.width = width
+
+ self.in_a = FPOpIn(width)
+ self.in_b = FPOpIn(width)
+ self.out_z = FPOpOut(width)
+
+ self.states = []
+
+ def add_state(self, state):
+ self.states.append(state)
+ return state
+
+ def elaborate(self, platform=None):
+ """ creates the HDL code-fragment for FPDiv
+ """
+ m = Module()
+
+ # Latches
+ a = FPNumIn(None, self.width, False)
+ b = FPNumIn(None, self.width, False)
+ z = FPNumOut(self.width, False)
+
+ div = Div(a.m_width*2 + 3) # double the mantissa width plus g/r/sticky
+
+ of = Overflow()
+ m.submodules.in_a = a
+ m.submodules.in_b = b
+ m.submodules.z = z
+ m.submodules.of = of
+
+ m.d.comb += a.v.eq(self.in_a.v)
+ m.d.comb += b.v.eq(self.in_b.v)
+
+ with m.FSM() as fsm:
+
+ # ******
+ # gets operand a
+
+ with m.State("get_a"):
+ res = self.get_op(m, self.in_a, a, "get_b")
+ m.d.sync += eq([a, self.in_a.ready_o], res)
+
+ # ******
+ # gets operand b
+
+ with m.State("get_b"):
+ res = self.get_op(m, self.in_b, b, "special_cases")
+ m.d.sync += eq([b, self.in_b.ready_o], res)
+
+ # ******
+ # special cases: NaNs, infs, zeros, denormalised
+ # NOTE: some of these are unique to div. see "Special Operations"
+ # https://steve.hollasch.net/cgindex/coding/ieeefloat.html
+
+ with m.State("special_cases"):
+
+ # if a is NaN or b is NaN return NaN
+ with m.If(a.is_nan | b.is_nan):
+ m.next = "put_z"
+ m.d.sync += z.nan(1)
+
+ # if a is Inf and b is Inf return NaN
+ with m.Elif(a.is_inf & b.is_inf):
+ m.next = "put_z"
+ m.d.sync += z.nan(1)
+
+ # if a is inf return inf (or NaN if b is zero)
+ with m.Elif(a.is_inf):
+ m.next = "put_z"
+ m.d.sync += z.inf(a.s ^ b.s)
+
+ # if b is inf return zero
+ with m.Elif(b.is_inf):
+ m.next = "put_z"
+ m.d.sync += z.zero(a.s ^ b.s)
+
+ # if a is zero return zero (or NaN if b is zero)
+ with m.Elif(a.is_zero):
+ m.next = "put_z"
+ # if b is zero return NaN
+ with m.If(b.is_zero):
+ m.d.sync += z.nan(1)
+ with m.Else():
+ m.d.sync += z.zero(a.s ^ b.s)
+
+ # if b is zero return Inf
+ with m.Elif(b.is_zero):
+ m.next = "put_z"
+ m.d.sync += z.inf(a.s ^ b.s)
+
+ # Denormalised Number checks
+ with m.Else():
+ m.next = "normalise_a"
+ self.denormalise(m, a)
+ self.denormalise(m, b)
+
+ # ******
+ # normalise_a
+
+ with m.State("normalise_a"):
+ self.op_normalise(m, a, "normalise_b")
+
+ # ******
+ # normalise_b
+
+ with m.State("normalise_b"):
+ self.op_normalise(m, b, "divide_0")
+
+ # ******
+ # First stage of divide. initialise state
+
+ with m.State("divide_0"):
+ m.next = "divide_1"
+ m.d.sync += [
+ z.s.eq(a.s ^ b.s), # sign
+ z.e.eq(a.e - b.e), # exponent
+ div.dend.eq(a.m<<(a.m_width+3)), # 3 bits for g/r/sticky
+ div.dor.eq(b.m),
+ ]
+ div.reset(m)
+
+ # ******
+ # Second stage of divide.
+
+ with m.State("divide_1"):
+ m.next = "divide_2"
+ m.d.sync += [
+ div.quot.eq(div.quot << 1),
+ div.rem.eq(Cat(div.dend[-1], div.rem[0:])),
+ div.dend.eq(div.dend << 1),
+ ]
+
+ # ******
+ # Third stage of divide.
+ # This stage ends by jumping out to divide_3
+ # However it defaults to jumping to divide_1 (which comes back here)
+
+ with m.State("divide_2"):
+ with m.If(div.rem >= div.dor):
+ m.d.sync += [
+ div.quot[0].eq(1),
+ div.rem.eq(div.rem - div.dor),
+ ]
+ with m.If(div.count == div.width-2):
+ m.next = "divide_3"
+ with m.Else():
+ m.next = "divide_1"
+ m.d.sync += [
+ div.count.eq(div.count + 1),
+ ]
+
+ # ******
+ # Fourth stage of divide.
+
+ with m.State("divide_3"):
+ m.next = "normalise_1"
+ m.d.sync += [
+ z.m.eq(div.quot[3:]),
+ of.guard.eq(div.quot[2]),
+ of.round_bit.eq(div.quot[1]),
+ of.sticky.eq(div.quot[0] | (div.rem != 0))
+ ]
+
+ # ******
+ # First stage of normalisation.
+
+ with m.State("normalise_1"):
+ self.normalise_1(m, z, of, "normalise_2")
+
+ # ******
+ # Second stage of normalisation.
+
+ with m.State("normalise_2"):
+ self.normalise_2(m, z, of, "round")
+
+ # ******
+ # rounding stage
+
+ with m.State("round"):
+ self.roundz(m, z, of.roundz)
+ m.next = "corrections"
+
+ # ******
+ # correction stage
+
+ with m.State("corrections"):
+ self.corrections(m, z, "pack")
+
+ # ******
+ # pack stage
+
+ with m.State("pack"):
+ self.pack(m, z, "put_z")
+
+ # ******
+ # put_z stage
+
+ with m.State("put_z"):
+ self.put_z(m, z, self.out_z, "get_a")
+
+ return m
+
+
+if __name__ == "__main__":
+ alu = FPDIV(width=32)
+ main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports())
+
+
+ # works... but don't use, just do "python fname.py convert -t v"
+ #print (verilog.convert(alu, ports=[
+ # ports=alu.in_a.ports() + \
+ # alu.in_b.ports() + \
+ # alu.out_z.ports())
--- /dev/null
+""" nmigen operator functions / utils
+
+ eq:
+ --
+
+ a strategically very important function that is identical in function
+ to nmigen's Signal.eq function, except it may take objects, or a list
+ of objects, or a tuple of objects, and where objects may also be
+ Records.
+"""
+
+from nmigen import Signal, Cat, Const, Mux, Module, Value, Elaboratable
+from nmigen.cli import verilog, rtlil
+from nmigen.lib.fifo import SyncFIFO, SyncFIFOBuffered
+from nmigen.hdl.ast import ArrayProxy
+from nmigen.hdl.rec import Record, Layout
+
+from abc import ABCMeta, abstractmethod
+from collections.abc import Sequence, Iterable
+from collections import OrderedDict
+from queue import Queue
+import inspect
+
+
+class Visitor2:
+ """ a helper class for iterating twin-argument compound data structures.
+
+ Record is a special (unusual, recursive) case, where the input may be
+ specified as a dictionary (which may contain further dictionaries,
+ recursively), where the field names of the dictionary must match
+ the Record's field spec. Alternatively, an object with the same
+ member names as the Record may be assigned: it does not have to
+ *be* a Record.
+
+ ArrayProxy is also special-cased, it's a bit messy: whilst ArrayProxy
+ has an eq function, the object being assigned to it (e.g. a python
+ object) might not. despite the *input* having an eq function,
+ that doesn't help us, because it's the *ArrayProxy* that's being
+ assigned to. so.... we cheat. use the ports() function of the
+ python object, enumerate them, find out the list of Signals that way,
+ and assign them.
+ """
+ def iterator2(self, o, i):
+ if isinstance(o, dict):
+ yield from self.dict_iter2(o, i)
+
+ if not isinstance(o, Sequence):
+ o, i = [o], [i]
+ for (ao, ai) in zip(o, i):
+ #print ("visit", fn, ao, ai)
+ if isinstance(ao, Record):
+ yield from self.record_iter2(ao, ai)
+ elif isinstance(ao, ArrayProxy) and not isinstance(ai, Value):
+ yield from self.arrayproxy_iter2(ao, ai)
+ else:
+ yield (ao, ai)
+
+ def dict_iter2(self, o, i):
+ for (k, v) in o.items():
+ print ("d-iter", v, i[k])
+ yield (v, i[k])
+ return res
+
+ def _not_quite_working_with_all_unit_tests_record_iter2(self, ao, ai):
+ print ("record_iter2", ao, ai, type(ao), type(ai))
+ if isinstance(ai, Value):
+ if isinstance(ao, Sequence):
+ ao, ai = [ao], [ai]
+ for o, i in zip(ao, ai):
+ yield (o, i)
+ return
+ for idx, (field_name, field_shape, _) in enumerate(ao.layout):
+ if isinstance(field_shape, Layout):
+ val = ai.fields
+ else:
+ val = ai
+ if hasattr(val, field_name): # check for attribute
+ val = getattr(val, field_name)
+ else:
+ val = val[field_name] # dictionary-style specification
+ yield from self.iterator2(ao.fields[field_name], val)
+
+ def record_iter2(self, ao, ai):
+ for idx, (field_name, field_shape, _) in enumerate(ao.layout):
+ if isinstance(field_shape, Layout):
+ val = ai.fields
+ else:
+ val = ai
+ if hasattr(val, field_name): # check for attribute
+ val = getattr(val, field_name)
+ else:
+ val = val[field_name] # dictionary-style specification
+ yield from self.iterator2(ao.fields[field_name], val)
+
+ def arrayproxy_iter2(self, ao, ai):
+ for p in ai.ports():
+ op = getattr(ao, p.name)
+ print ("arrayproxy - p", p, p.name)
+ yield from self.iterator2(op, p)
+
+
+class Visitor:
+ """ a helper class for iterating single-argument compound data structures.
+ similar to Visitor2.
+ """
+ def iterate(self, i):
+ """ iterate a compound structure recursively using yield
+ """
+ if not isinstance(i, Sequence):
+ i = [i]
+ for ai in i:
+ #print ("iterate", ai)
+ if isinstance(ai, Record):
+ #print ("record", list(ai.layout))
+ yield from self.record_iter(ai)
+ elif isinstance(ai, ArrayProxy) and not isinstance(ai, Value):
+ yield from self.array_iter(ai)
+ else:
+ yield ai
+
+ def record_iter(self, ai):
+ for idx, (field_name, field_shape, _) in enumerate(ai.layout):
+ if isinstance(field_shape, Layout):
+ val = ai.fields
+ else:
+ val = ai
+ if hasattr(val, field_name): # check for attribute
+ val = getattr(val, field_name)
+ else:
+ val = val[field_name] # dictionary-style specification
+ #print ("recidx", idx, field_name, field_shape, val)
+ yield from self.iterate(val)
+
+ def array_iter(self, ai):
+ for p in ai.ports():
+ yield from self.iterate(p)
+
+
+def eq(o, i):
+ """ makes signals equal: a helper routine which identifies if it is being
+ passed a list (or tuple) of objects, or signals, or Records, and calls
+ the objects' eq function.
+ """
+ res = []
+ for (ao, ai) in Visitor2().iterator2(o, i):
+ rres = ao.eq(ai)
+ if not isinstance(rres, Sequence):
+ rres = [rres]
+ res += rres
+ return res
+
+
+def shape(i):
+ #print ("shape", i)
+ r = 0
+ for part in list(i):
+ #print ("shape?", part)
+ s, _ = part.shape()
+ r += s
+ return r, False
+
+
+def cat(i):
+ """ flattens a compound structure recursively using Cat
+ """
+ from nmigen.tools import flatten
+ #res = list(flatten(i)) # works (as of nmigen commit f22106e5) HOWEVER...
+ res = list(Visitor().iterate(i)) # needed because input may be a sequence
+ return Cat(*res)
+
+
--- /dev/null
+""" Example 5: Making use of PyRTL and Introspection. """
+
+from collections.abc import Sequence
+
+from nmigen import Signal
+from nmigen.hdl.rec import Record
+from nmigen import tracer
+from nmigen.compat.fhdl.bitcontainer import value_bits_sign
+from contextlib import contextmanager
+
+from nmoperator import eq
+from singlepipe import StageCls, ControlBase, BufferedHandshake
+from singlepipe import UnbufferedPipeline
+
+
+# The following example shows how pyrtl can be used to make some interesting
+# hardware structures using python introspection. In particular, this example
+# makes a N-stage pipeline structure. Any specific pipeline is then a derived
+# class of SimplePipeline where methods with names starting with "stage" are
+# stages, and new members with names not starting with "_" are to be registered
+# for the next stage.
+
+def like(value, rname, pipe, pipemode=False):
+ if isinstance(value, ObjectProxy):
+ return ObjectProxy.like(pipe, value, pipemode=pipemode,
+ name=rname, reset_less=True)
+ else:
+ return Signal(value_bits_sign(value), name=rname,
+ reset_less=True)
+ return Signal.like(value, name=rname, reset_less=True)
+
+def get_assigns(_assigns):
+ assigns = []
+ for e in _assigns:
+ if isinstance(e, ObjectProxy):
+ assigns += get_assigns(e._assigns)
+ else:
+ assigns.append(e)
+ return assigns
+
+
+def get_eqs(_eqs):
+ eqs = []
+ for e in _eqs:
+ if isinstance(e, ObjectProxy):
+ eqs += get_eqs(e._eqs)
+ else:
+ eqs.append(e)
+ return eqs
+
+
+class ObjectProxy:
+ def __init__(self, m, name=None, pipemode=False, syncmode=True):
+ self._m = m
+ if name is None:
+ name = tracer.get_var_name(default=None)
+ self.name = name
+ self._pipemode = pipemode
+ self._syncmode = syncmode
+ self._eqs = {}
+ self._assigns = []
+ self._preg_map = {}
+
+ @classmethod
+ def like(cls, m, value, pipemode=False, name=None, src_loc_at=0, **kwargs):
+ name = name or tracer.get_var_name(depth=2 + src_loc_at,
+ default="$like")
+
+ src_loc_at_1 = 1 + src_loc_at
+ r = ObjectProxy(m, value.name, pipemode)
+ #for a, aname in value._preg_map.items():
+ # r._preg_map[aname] = like(a, aname, m, pipemode)
+ for a in value.ports():
+ aname = a.name
+ r._preg_map[aname] = like(a, aname, m, pipemode)
+ return r
+
+ def __repr__(self):
+ subobjs = []
+ for a in self.ports():
+ aname = a.name
+ ai = self._preg_map[aname]
+ subobjs.append(repr(ai))
+ return "<OP %s>" % subobjs
+
+ def get_specs(self, liked=False):
+ res = []
+ for k, v in self._preg_map.items():
+ #v = like(v, k, stage._m)
+ res.append(v)
+ if isinstance(v, ObjectProxy):
+ res += v.get_specs()
+ return res
+
+ def eq(self, i):
+ print ("ObjectProxy eq", self, i)
+ res = []
+ for a in self.ports():
+ aname = a.name
+ ai = i._preg_map[aname]
+ res.append(a.eq(ai))
+ return res
+
+ def ports(self):
+ res = []
+ for aname, a in self._preg_map.items():
+ if isinstance(a, Signal) or isinstance(a, ObjectProxy) or \
+ isinstance(a, Record):
+ res.append(a)
+ #print ("ObjectPorts", res)
+ return res
+
+ def __getattr__(self, name):
+ try:
+ v = self._preg_map[name]
+ return v
+ #return like(v, name, self._m)
+ except KeyError:
+ raise AttributeError(
+ 'error, no pipeline register "%s" defined for OP %s'
+ % (name, self.name))
+
+ def __setattr__(self, name, value):
+ if name.startswith('_') or name in ['name', 'ports', 'eq', 'like']:
+ # do not do anything tricky with variables starting with '_'
+ object.__setattr__(self, name, value)
+ return
+ #rname = "%s_%s" % (self.name, name)
+ rname = name
+ new_pipereg = like(value, rname, self._m, self._pipemode)
+ self._preg_map[name] = new_pipereg
+ #object.__setattr__(self, name, new_pipereg)
+ if self._pipemode:
+ #print ("OP pipemode", self._syncmode, new_pipereg, value)
+ assign = eq(new_pipereg, value)
+ if self._syncmode:
+ self._m.d.sync += assign
+ else:
+ self._m.d.comb += assign
+ elif self._m:
+ #print ("OP !pipemode assign", new_pipereg, value, type(value))
+ self._m.d.comb += eq(new_pipereg, value)
+ else:
+ #print ("OP !pipemode !m", new_pipereg, value, type(value))
+ self._assigns += eq(new_pipereg, value)
+ if isinstance(value, ObjectProxy):
+ #print ("OP, defer assigns:", value._assigns)
+ self._assigns += value._assigns
+ self._eqs.append(value._eqs)
+
+
+class PipelineStage:
+ """ Pipeline builder stage with auto generation of pipeline registers.
+ """
+
+ def __init__(self, name, m, prev=None, pipemode=False, ispec=None):
+ self._m = m
+ self._stagename = name
+ self._preg_map = {'__nextstage__': {}}
+ self._prev_stage = prev
+ self._ispec = ispec
+ if ispec:
+ self._preg_map[self._stagename] = ispec
+ if prev:
+ print ("prev", prev._stagename, prev._preg_map)
+ #if prev._stagename in prev._preg_map:
+ # m = prev._preg_map[prev._stagename]
+ # self._preg_map[prev._stagename] = m
+ if '__nextstage__' in prev._preg_map:
+ m = prev._preg_map['__nextstage__']
+ m = likedict(m)
+ self._preg_map[self._stagename] = m
+ #for k, v in m.items():
+ #m[k] = like(v, k, self._m)
+ print ("make current", self._stagename, m)
+ self._pipemode = pipemode
+ self._eqs = {}
+ self._assigns = []
+
+ def __getattribute__(self, name):
+ if name.startswith('_'):
+ return object.__getattribute__(self, name)
+ #if name in self._preg_map['__nextstage__']:
+ # return self._preg_map['__nextstage__'][name]
+ try:
+ print ("getattr", name, object.__getattribute__(self, '_preg_map'))
+ v = self._preg_map[self._stagename][name]
+ return v
+ #return like(v, name, self._m)
+ except KeyError:
+ raise AttributeError(
+ 'error, no pipeline register "%s" defined for stage %s'
+ % (name, self._stagename))
+
+ def __setattr__(self, name, value):
+ if name.startswith('_'):
+ # do not do anything tricky with variables starting with '_'
+ object.__setattr__(self, name, value)
+ return
+ pipereg_id = self._stagename
+ rname = 'pipereg_' + pipereg_id + '_' + name
+ new_pipereg = like(value, rname, self._m, self._pipemode)
+ next_stage = '__nextstage__'
+ if next_stage not in self._preg_map:
+ self._preg_map[next_stage] = {}
+ self._preg_map[next_stage][name] = new_pipereg
+ print ("setattr", name, value, self._preg_map)
+ if self._pipemode:
+ self._eqs[name] = new_pipereg
+ assign = eq(new_pipereg, value)
+ print ("pipemode: append", new_pipereg, value, assign)
+ if isinstance(value, ObjectProxy):
+ print ("OP, assigns:", value._assigns)
+ self._assigns += value._assigns
+ self._eqs[name]._eqs = value._eqs
+ #self._m.d.comb += assign
+ self._assigns += assign
+ elif self._m:
+ print ("!pipemode: assign", new_pipereg, value)
+ assign = eq(new_pipereg, value)
+ self._m.d.sync += assign
+ else:
+ print ("!pipemode !m: defer assign", new_pipereg, value)
+ assign = eq(new_pipereg, value)
+ self._eqs[name] = new_pipereg
+ self._assigns += assign
+ if isinstance(value, ObjectProxy):
+ print ("OP, defer assigns:", value._assigns)
+ self._assigns += value._assigns
+ self._eqs[name]._eqs = value._eqs
+
+def likelist(specs):
+ res = []
+ for v in specs:
+ res.append(like(v, v.name, None, pipemode=True))
+ return res
+
+def likedict(specs):
+ if not isinstance(specs, dict):
+ return like(specs, specs.name, None, pipemode=True)
+ res = {}
+ for k, v in specs.items():
+ res[k] = likedict(v)
+ return res
+
+
+class AutoStage(StageCls):
+ def __init__(self, inspecs, outspecs, eqs, assigns):
+ self.inspecs, self.outspecs = inspecs, outspecs
+ self.eqs, self.assigns = eqs, assigns
+ #self.o = self.ospec()
+ def ispec(self): return likedict(self.inspecs)
+ def ospec(self): return likedict(self.outspecs)
+
+ def process(self, i):
+ print ("stage process", i)
+ return self.eqs
+
+ def setup(self, m, i):
+ print ("stage setup i", i, m)
+ print ("stage setup inspecs", self.inspecs)
+ print ("stage setup outspecs", self.outspecs)
+ print ("stage setup eqs", self.eqs)
+ #self.o = self.ospec()
+ m.d.comb += eq(self.inspecs, i)
+ #m.d.comb += eq(self.outspecs, self.eqs)
+ #m.d.comb += eq(self.o, i)
+
+
+class AutoPipe(UnbufferedPipeline):
+ def __init__(self, stage, assigns):
+ UnbufferedPipeline.__init__(self, stage)
+ self.assigns = assigns
+
+ def elaborate(self, platform):
+ m = UnbufferedPipeline.elaborate(self, platform)
+ m.d.comb += self.assigns
+ print ("assigns", self.assigns, m)
+ return m
+
+
+class PipeManager:
+ def __init__(self, m, pipemode=False, pipetype=None):
+ self.m = m
+ self.pipemode = pipemode
+ self.pipetype = pipetype
+
+ @contextmanager
+ def Stage(self, name, prev=None, ispec=None):
+ if ispec:
+ ispec = likedict(ispec)
+ print ("start stage", name, ispec)
+ stage = PipelineStage(name, None, prev, self.pipemode, ispec=ispec)
+ try:
+ yield stage, self.m #stage._m
+ finally:
+ pass
+ if self.pipemode:
+ if stage._ispec:
+ print ("use ispec", stage._ispec)
+ inspecs = stage._ispec
+ else:
+ inspecs = self.get_specs(stage, name)
+ #inspecs = likedict(inspecs)
+ outspecs = self.get_specs(stage, '__nextstage__', liked=True)
+ print ("stage inspecs", name, inspecs)
+ print ("stage outspecs", name, outspecs)
+ eqs = stage._eqs # get_eqs(stage._eqs)
+ assigns = get_assigns(stage._assigns)
+ print ("stage eqs", name, eqs)
+ print ("stage assigns", name, assigns)
+ s = AutoStage(inspecs, outspecs, eqs, assigns)
+ self.stages.append(s)
+ print ("end stage", name, self.pipemode, "\n")
+
+ def get_specs(self, stage, name, liked=False):
+ return stage._preg_map[name]
+ if name in stage._preg_map:
+ res = []
+ for k, v in stage._preg_map[name].items():
+ #v = like(v, k, stage._m)
+ res.append(v)
+ #if isinstance(v, ObjectProxy):
+ # res += v.get_specs()
+ return res
+ return {}
+
+ def __enter__(self):
+ self.stages = []
+ return self
+
+ def __exit__(self, *args):
+ print ("exit stage", args)
+ pipes = []
+ cb = ControlBase()
+ for s in self.stages:
+ print ("stage specs", s, s.inspecs, s.outspecs)
+ if self.pipetype == 'buffered':
+ p = BufferedHandshake(s)
+ else:
+ p = AutoPipe(s, s.assigns)
+ pipes.append(p)
+ self.m.submodules += p
+
+ self.m.d.comb += cb.connect(pipes)
+
+
+class SimplePipeline:
+ """ Pipeline builder with auto generation of pipeline registers.
+ """
+
+ def __init__(self, m):
+ self._m = m
+ self._pipeline_register_map = {}
+ self._current_stage_num = 0
+
+ def _setup(self):
+ stage_list = []
+ for method in dir(self):
+ if method.startswith('stage'):
+ stage_list.append(method)
+ for stage in sorted(stage_list):
+ stage_method = getattr(self, stage)
+ stage_method()
+ self._current_stage_num += 1
+
+ def __getattr__(self, name):
+ try:
+ return self._pipeline_register_map[self._current_stage_num][name]
+ except KeyError:
+ raise AttributeError(
+ 'error, no pipeline register "%s" defined for stage %d'
+ % (name, self._current_stage_num))
+
+ def __setattr__(self, name, value):
+ if name.startswith('_'):
+ # do not do anything tricky with variables starting with '_'
+ object.__setattr__(self, name, value)
+ return
+ next_stage = self._current_stage_num + 1
+ pipereg_id = str(self._current_stage_num) + 'to' + str(next_stage)
+ rname = 'pipereg_' + pipereg_id + '_' + name
+ #new_pipereg = Signal(value_bits_sign(value), name=rname,
+ # reset_less=True)
+ if isinstance(value, ObjectProxy):
+ new_pipereg = ObjectProxy.like(self._m, value,
+ name=rname, reset_less = True)
+ else:
+ new_pipereg = Signal.like(value, name=rname, reset_less = True)
+ if next_stage not in self._pipeline_register_map:
+ self._pipeline_register_map[next_stage] = {}
+ self._pipeline_register_map[next_stage][name] = new_pipereg
+ self._m.d.sync += eq(new_pipereg, value)
+
--- /dev/null
+""" Example 5: Making use of PyRTL and Introspection. """
+
+from nmigen import Module, Signal, Const
+from nmigen.cli import main, verilog, rtlil
+
+
+from pipeline import SimplePipeline, ObjectProxy, PipeManager
+
+
+class SimplePipelineExample(SimplePipeline):
+ """ A very simple pipeline to show how registers are inferred. """
+
+ def __init__(self, pipe):
+ SimplePipeline.__init__(self, pipe)
+ self._loopback = Signal(4)
+ self._setup()
+
+ def stage0(self):
+ self.n = ~self._loopback
+
+ def stage1(self):
+ self.n = self.n + 2
+
+ def stage2(self):
+ localv = Signal(4)
+ self._pipe.comb += localv.eq(2)
+ self.n = self.n << localv
+
+ def stage3(self):
+ self.n = ~self.n
+
+ def stage4(self):
+ self._pipe.sync += self._loopback.eq(self.n + 3)
+
+
+class ObjectBasedPipelineExample(SimplePipeline):
+ """ A very simple pipeline to show how registers are inferred. """
+
+ def __init__(self, m):
+ SimplePipeline.__init__(self, m)
+ self._loopback = Signal(4)
+ o = ObjectProxy(m)
+ o.a = Signal(4)
+ o.b = Signal(4)
+ self._obj = o
+ self._setup()
+
+ def stage0(self):
+ self.n = ~self._loopback
+ self.o = self._obj
+
+ def stage1(self):
+ self.n = self.n + self.o.a
+ o = ObjectProxy(self._m)
+ o.c = self.n
+ o.d = self.o.b + self.n + Const(5)
+ self.o = o
+
+ def stage2(self):
+ localv = Signal(4)
+ self._m.d.comb += localv.eq(2)
+ self.n = self.n << localv
+ o = ObjectProxy(self._m)
+ o.e = self.n + self.o.c + self.o.d
+ self.o = o
+
+ def stage3(self):
+ self.n = ~self.n
+ self.o = self.o
+ self.o.e = self.o.e + self.n
+
+ def stage4(self):
+ self._m.d.sync += self._loopback.eq(self.n + 3 + self.o.e)
+
+
+class PipeModule:
+
+ def __init__(self):
+ self.m = Module()
+ self.p = ObjectBasedPipelineExample(self.m)
+
+ def elaborate(self, platform=None):
+ return self.m
+
+
+class PipelineStageExample:
+
+ def __init__(self):
+ self._loopback = Signal(4, name="loopback")
+
+ def elaborate(self, platform=None):
+
+ m = Module()
+
+ with PipeManager(m, pipemode=True) as pipe:
+
+ ispec={'loopback': self._loopback}
+ with pipe.Stage("first", ispec=ispec) as (p, m):
+ p.n = ~p.loopback
+ with pipe.Stage("second", p) as (p, m):
+ #p.n = ~self._loopback + 2
+ p.n = p.n + Const(2)
+ with pipe.Stage("third", p) as (p, m):
+ #p.n = ~self._loopback + 5
+ localv = Signal(4)
+ m.d.comb += localv.eq(2)
+ p.n = p.n << localv + Const(1)
+ #p.m = p.n + 2
+
+ print (pipe.stages)
+
+ return m
+
+class PipelineStageObjectExample:
+
+ def __init__(self):
+ self.loopback = Signal(4)
+
+ def elaborate(self, platform=None):
+
+ m = Module()
+
+ o = ObjectProxy(None, pipemode=False)
+ o.a = Signal(4)
+ o.b = Signal(4)
+ self.obj = o
+
+ localv2 = Signal(4)
+ m.d.sync += localv2.eq(localv2 + 3)
+
+ #m.d.comb += self.obj.a.eq(localv2 + 1)
+ #m.d.sync += self._loopback.eq(localv2)
+
+ ispec= {'loopback': self.loopback, 'obj': self.obj}
+ with PipeManager(m, pipemode=True) as pipe:
+
+ with pipe.Stage("first", ispec=ispec) as (p, m):
+ p.n = ~p.loopback
+ p.o = p.obj
+ with pipe.Stage("second", p) as (p, m):
+ #p.n = ~self.loopback + 2
+ localn = Signal(4)
+ m.d.comb += localn.eq(p.n)
+ o = ObjectProxy(None, pipemode=False)
+ o.c = localn
+ o.d = p.o.b + localn + Const(5)
+ p.n = localn
+ p.o = o
+ with pipe.Stage("third", p) as (p, m):
+ #p.n = ~self._loopback + 5
+ localv = Signal(4)
+ m.d.comb += localv.eq(2)
+ p.n = p.n << localv
+ o = ObjectProxy(None, pipemode=False)
+ o.e = p.n + p.o.c + p.o.d
+ p.o = o
+
+ print ("stages", pipe.stages)
+
+ return m
+
+
+class PipelineStageObjectExample2:
+
+ def __init__(self):
+ self._loopback = Signal(4)
+
+ def elaborate(self, platform=None):
+
+ m = Module()
+
+ ispec= [self._loopback]
+ with PipeManager(m, pipemode=True) as pipe:
+
+ with pipe.Stage("first",
+ ispec=ispec) as (p, m):
+ p.n = ~self._loopback
+ o = ObjectProxy(None, pipemode=False)
+ o.b = ~self._loopback + Const(5)
+ p.o = o
+
+ print ("stages", pipe.stages)
+
+ return m
+
+
+
+if __name__ == "__main__":
+ example = PipeModule()
+ with open("pipe_module.il", "w") as f:
+ f.write(rtlil.convert(example, ports=[
+ example.p._loopback,
+ ]))
+ example = PipelineStageExample()
+ with open("pipe_stage_module.il", "w") as f:
+ f.write(rtlil.convert(example, ports=[
+ example._loopback,
+ ]))
+ #exit(0)
+ example = PipelineStageObjectExample()
+ with open("pipe_stage_object_module.il", "w") as f:
+ f.write(rtlil.convert(example, ports=[
+ example.loopback,
+ ]))
--- /dev/null
+# Copyright (c) 2014 - 2019 The Regents of the University of
+# California (Regents). All Rights Reserved. Redistribution and use in
+# source and binary forms, with or without modification, are permitted
+# provided that the following conditions are met:
+# * Redistributions of source code must retain the above
+# copyright notice, this list of conditions and the following
+# two paragraphs of disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# two paragraphs of disclaimer in the documentation and/or other materials
+# provided with the distribution.
+# * Neither the name of the Regents nor the names of its contributors
+# may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
+# SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
+# ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
+# REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF
+# ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION
+# TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
+# MODIFICATIONS.
+
+from nmigen import Module, Signal, Memory, Mux, Elaboratable
+from nmigen.tools import bits_for
+from nmigen.cli import main
+from nmigen.lib.fifo import FIFOInterface
+
+# translated from https://github.com/freechipsproject/chisel3/blob/a4a29e29c3f1eed18f851dcf10bdc845571dfcb6/src/main/scala/chisel3/util/Decoupled.scala#L185 # noqa
+
+
+class Queue(FIFOInterface, Elaboratable):
+ def __init__(self, width, depth, fwft=True, pipe=False):
+ """ Queue (FIFO) with pipe mode and first-write fall-through capability
+
+ * :width: width of Queue data in/out
+ * :depth: queue depth. NOTE: may be set to 0 (this is ok)
+ * :fwft : first-write, fall-through mode (Chisel Queue "flow" mode)
+ * :pipe : pipe mode. NOTE: this mode can cause unanticipated
+ problems. when read is enabled, so is writeable.
+ therefore if read is enabled, the data ABSOLUTELY MUST
+ be read.
+
+ fwft mode = True basically means that the data may be transferred
+ combinatorially from input to output.
+
+ Attributes:
+ * level: available free space (number of unread entries)
+
+ din = enq_data, writable = enq_ready, we = enq_valid
+ dout = deq_data, re = deq_ready, readable = deq_valid
+ """
+ FIFOInterface.__init__(self, width, depth, fwft)
+ self.pipe = pipe
+ self.depth = depth
+ self.level = Signal(bits_for(depth))
+
+ def elaborate(self, platform):
+ m = Module()
+
+ # set up an SRAM. XXX bug in Memory: cannot create SRAM of depth 1
+ ram = Memory(self.width, self.depth if self.depth > 1 else 2)
+ m.submodules.ram_read = ram_read = ram.read_port(synchronous=False)
+ m.submodules.ram_write = ram_write = ram.write_port()
+
+ # convenience names
+ p_ready_o = self.writable
+ p_valid_i = self.we
+ enq_data = self.din
+
+ n_valid_o = self.readable
+ n_ready_i = self.re
+ deq_data = self.dout
+
+ # intermediaries
+ ptr_width = bits_for(self.depth - 1) if self.depth > 1 else 0
+ enq_ptr = Signal(ptr_width) # cyclic pointer to "insert" point (wrport)
+ deq_ptr = Signal(ptr_width) # cyclic pointer to "remove" point (rdport)
+ maybe_full = Signal() # not reset_less (set by sync)
+
+ # temporaries
+ do_enq = Signal(reset_less=True)
+ do_deq = Signal(reset_less=True)
+ ptr_diff = Signal(ptr_width)
+ ptr_match = Signal(reset_less=True)
+ empty = Signal(reset_less=True)
+ full = Signal(reset_less=True)
+ enq_max = Signal(reset_less=True)
+ deq_max = Signal(reset_less=True)
+
+ m.d.comb += [ptr_match.eq(enq_ptr == deq_ptr), # read-ptr = write-ptr
+ ptr_diff.eq(enq_ptr - deq_ptr),
+ enq_max.eq(enq_ptr == self.depth - 1),
+ deq_max.eq(deq_ptr == self.depth - 1),
+ empty.eq(ptr_match & ~maybe_full),
+ full.eq(ptr_match & maybe_full),
+ do_enq.eq(p_ready_o & p_valid_i), # write conditions ok
+ do_deq.eq(n_ready_i & n_valid_o), # read conditions ok
+
+ # set readable and writable (NOTE: see pipe mode below)
+ n_valid_o.eq(~empty), # cannot read if empty!
+ p_ready_o.eq(~full), # cannot write if full!
+
+ # set up memory and connect to input and output
+ ram_write.addr.eq(enq_ptr),
+ ram_write.data.eq(enq_data),
+ ram_write.en.eq(do_enq),
+ ram_read.addr.eq(deq_ptr),
+ deq_data.eq(ram_read.data) # NOTE: overridden in fwft mode
+ ]
+
+ # under write conditions, SRAM write-pointer moves on next clock
+ with m.If(do_enq):
+ m.d.sync += enq_ptr.eq(Mux(enq_max, 0, enq_ptr+1))
+
+ # under read conditions, SRAM read-pointer moves on next clock
+ with m.If(do_deq):
+ m.d.sync += deq_ptr.eq(Mux(deq_max, 0, deq_ptr+1))
+
+ # if read-but-not-write or write-but-not-read, maybe_full set
+ with m.If(do_enq != do_deq):
+ m.d.sync += maybe_full.eq(do_enq)
+
+ # first-word fall-through: same as "flow" parameter in Chisel3 Queue
+ # basically instead of relying on the Memory characteristics (which
+ # in FPGAs do not have write-through), then when the queue is empty
+ # take the output directly from the input, i.e. *bypass* the SRAM.
+ # this done combinatorially to give the exact same characteristics
+ # as Memory "write-through"... without relying on a changing API
+ if self.fwft:
+ with m.If(p_valid_i):
+ m.d.comb += n_valid_o.eq(1)
+ with m.If(empty):
+ m.d.comb += deq_data.eq(enq_data)
+ m.d.comb += do_deq.eq(0)
+ with m.If(n_ready_i):
+ m.d.comb += do_enq.eq(0)
+
+ # pipe mode: if next stage says it's ready (readable), we
+ # *must* declare the input ready (writeable).
+ if self.pipe:
+ with m.If(n_ready_i):
+ m.d.comb += p_ready_o.eq(1)
+
+ # set the count (available free space), optimise on power-of-two
+ if self.depth == 1 << ptr_width: # is depth a power of 2
+ m.d.comb += self.level.eq(
+ Mux(maybe_full & ptr_match, self.depth, 0) | ptr_diff)
+ else:
+ m.d.comb += self.level.eq(Mux(ptr_match,
+ Mux(maybe_full, self.depth, 0),
+ Mux(deq_ptr > enq_ptr,
+ self.depth + ptr_diff,
+ ptr_diff)))
+
+ return m
+
+
+if __name__ == "__main__":
+ reg_stage = Queue(1, 1, pipe=True)
+ break_ready_chain_stage = Queue(1, 1, pipe=True, fwft=True)
+ m = Module()
+ ports = []
+
+ def queue_ports(queue, name_prefix):
+ retval = []
+ for name in ["level",
+ "dout",
+ "readable",
+ "writable"]:
+ port = getattr(queue, name)
+ signal = Signal(port.shape(), name=name_prefix+name)
+ m.d.comb += signal.eq(port)
+ retval.append(signal)
+ for name in ["re",
+ "din",
+ "we"]:
+ port = getattr(queue, name)
+ signal = Signal(port.shape(), name=name_prefix+name)
+ m.d.comb += port.eq(signal)
+ retval.append(signal)
+ return retval
+
+ m.submodules.reg_stage = reg_stage
+ ports += queue_ports(reg_stage, "reg_stage_")
+ m.submodules.break_ready_chain_stage = break_ready_chain_stage
+ ports += queue_ports(break_ready_chain_stage, "break_ready_chain_stage_")
+ main(m, ports=ports)
--- /dev/null
+from nmigen import Module, Signal, Mux, Const, Elaboratable
+from nmigen.hdl.rec import Record, Layout, DIR_NONE
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen.compat.fhdl.bitcontainer import value_bits_sign
+from singlepipe import cat, RecordObject
+
+
+class RecordTest:
+
+ def __init__(self):
+ self.r1 = RecordObject()
+ self.r1.sig1 = Signal(16)
+ self.r1.r2 = RecordObject()
+ self.r1.r2.sig2 = Signal(16)
+ self.r1.r3 = RecordObject()
+ self.r1.r3.sig3 = Signal(16)
+ self.sig123 = Signal(48)
+
+ def elaborate(self, platform):
+ m = Module()
+
+ sig1 = Signal(16)
+ m.d.comb += sig1.eq(self.r1.sig1)
+ sig2 = Signal(16)
+ m.d.comb += sig2.eq(self.r1.r2.sig2)
+
+ print (self.r1.fields)
+ print (self.r1.shape())
+ print ("width", len(self.r1))
+ m.d.comb += self.sig123.eq(cat(self.r1))
+
+ return m
+
+
+def testbench(dut):
+ yield dut.r1.sig1.eq(5)
+ yield dut.r1.r2.sig2.eq(10)
+ yield dut.r1.r3.sig3.eq(1)
+
+ sig1 = yield dut.r1.sig1
+ assert sig1 == 5
+ sig2 = yield dut.r1.r2.sig2
+ assert sig2 == 10
+
+ yield
+
+ sig123 = yield dut.sig123
+ print ("sig123", hex(sig123))
+ assert sig123 == 0x1000a0005
+
+
+
+class RecordTest2(Elaboratable):
+
+ def __init__(self):
+ self.r1 = RecordObject()
+ self.r1.sig1 = Signal(16)
+ self.r1.r2 = RecordObject()
+ self.r1.r2.sig2 = Signal(16)
+ self.r1.r3 = RecordObject()
+ self.r1.r3.sig3 = Signal(16)
+ self.sig123 = Signal(48)
+
+ def elaborate(self, platform):
+ m = Module()
+
+ m.d.comb += cat(self.r1).eq(self.sig123)
+
+ return m
+
+
+def testbench2(dut):
+
+ sig123 = yield dut.sig123.eq(0x1000a0005)
+
+ yield
+
+ sig1 = yield dut.r1.sig1
+ assert sig1 == 5
+ sig2 = yield dut.r1.r2.sig2
+ assert sig2 == 10
+ sig3 = yield dut.r1.r3.sig3
+ assert sig3 == 1
+
+
+
+######################################################################
+# Unit Tests
+######################################################################
+
+if __name__ == '__main__':
+ print ("test 1")
+ dut = RecordTest()
+ run_simulation(dut, testbench(dut), vcd_name="test_record1.vcd")
+ vl = rtlil.convert(dut, ports=[dut.sig123, dut.r1.sig1, dut.r1.r2.sig2])
+ with open("test_record1.il", "w") as f:
+ f.write(vl)
+
+ print ("test 2")
+ dut = RecordTest2()
+ run_simulation(dut, testbench2(dut), vcd_name="test_record2.vcd")
+ vl = rtlil.convert(dut, ports=[dut.sig123, dut.r1.sig1, dut.r1.r2.sig2])
+ with open("test_record2.il", "w") as f:
+ f.write(vl)
+
--- /dev/null
+from nmigen import Signal, Cat, Const, Mux, Module
+
+from nmigen.cli import main, verilog
+
+from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
+from fpbase import MultiShiftRMerge
+
+class ReservationStationRow:
+
+ def __init__(self, width, id_wid):
+ """ Reservation Station row
+
+ * width: bit-width of IEEE754. supported: 16, 32, 64
+ * id_wid: an identifier to be passed through to the FunctionUnit
+ """
+ self.width = width
+
+ self.in_a = Signal(width)
+ self.in_b = Signal(width)
+ self.id_wid = id_wid
+ self.out_z = Signal(width)
+
+ def elaborate(self, platform=None):
+ """ creates the HDL code-fragment for ReservationStationRow
+ """
+ m = Module()
+
+ return m
+
+
+if __name__ == "__main__":
+ rs = ReservationStationRow(width=32, id_wid=Const(1,4))
+ main(alu, ports=[rs.in_a, rs.in_b, rs.out_z]
+
+ # works... but don't use, just do "python fname.py convert -t v"
+ #print (verilog.convert(alu, ports=[
+ # ports=alu.in_a.ports() + \
+ # alu.in_b.ports() + \
+ # alu.out_z.ports())
--- /dev/null
+""" Pipeline API. For multi-input and multi-output variants, see multipipe.
+
+ Associated development bugs:
+ * http://bugs.libre-riscv.org/show_bug.cgi?id=64
+ * http://bugs.libre-riscv.org/show_bug.cgi?id=57
+
+ Important: see Stage API (stageapi.py) in combination with below
+
+ RecordBasedStage:
+ ----------------
+
+ A convenience class that takes an input shape, output shape, a
+ "processing" function and an optional "setup" function. Honestly
+ though, there's not much more effort to just... create a class
+ that returns a couple of Records (see ExampleAddRecordStage in
+ examples).
+
+ PassThroughStage:
+ ----------------
+
+ A convenience class that takes a single function as a parameter,
+ that is chain-called to create the exact same input and output spec.
+ It has a process() function that simply returns its input.
+
+ Instances of this class are completely redundant if handed to
+ StageChain, however when passed to UnbufferedPipeline they
+ can be used to introduce a single clock delay.
+
+ ControlBase:
+ -----------
+
+ The base class for pipelines. Contains previous and next ready/valid/data.
+ Also has an extremely useful "connect" function that can be used to
+ connect a chain of pipelines and present the exact same prev/next
+ ready/valid/data API.
+
+ Note: pipelines basically do not become pipelines as such until
+ handed to a derivative of ControlBase. ControlBase itself is *not*
+ strictly considered a pipeline class. Wishbone and AXI4 (master or
+ slave) could be derived from ControlBase, for example.
+ UnbufferedPipeline:
+ ------------------
+
+ A simple stalling clock-synchronised pipeline that has no buffering
+ (unlike BufferedHandshake). Data flows on *every* clock cycle when
+ the conditions are right (this is nominally when the input is valid
+ and the output is ready).
+
+ A stall anywhere along the line will result in a stall back-propagating
+ down the entire chain. The BufferedHandshake by contrast will buffer
+ incoming data, allowing previous stages one clock cycle's grace before
+ also having to stall.
+
+ An advantage of the UnbufferedPipeline over the Buffered one is
+ that the amount of logic needed (number of gates) is greatly
+ reduced (no second set of buffers basically)
+
+ The disadvantage of the UnbufferedPipeline is that the valid/ready
+ logic, if chained together, is *combinatorial*, resulting in
+ progressively larger gate delay.
+
+ PassThroughHandshake:
+ ------------------
+
+ A Control class that introduces a single clock delay, passing its
+ data through unaltered. Unlike RegisterPipeline (which relies
+ on UnbufferedPipeline and PassThroughStage) it handles ready/valid
+ itself.
+
+ RegisterPipeline:
+ ----------------
+
+ A convenience class that, because UnbufferedPipeline introduces a single
+ clock delay, when its stage is a PassThroughStage, it results in a Pipeline
+ stage that, duh, delays its (unmodified) input by one clock cycle.
+
+ BufferedHandshake:
+ ----------------
+
+ nmigen implementation of buffered pipeline stage, based on zipcpu:
+ https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html
+
+ this module requires quite a bit of thought to understand how it works
+ (and why it is needed in the first place). reading the above is
+ *strongly* recommended.
+
+ unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires
+ the STB / ACK signals to raise and lower (on separate clocks) before
+ data may proceeed (thus only allowing one piece of data to proceed
+ on *ALTERNATE* cycles), the signalling here is a true pipeline
+ where data will flow on *every* clock when the conditions are right.
+
+ input acceptance conditions are when:
+ * incoming previous-stage strobe (p.valid_i) is HIGH
+ * outgoing previous-stage ready (p.ready_o) is LOW
+
+ output transmission conditions are when:
+ * outgoing next-stage strobe (n.valid_o) is HIGH
+ * outgoing next-stage ready (n.ready_i) is LOW
+
+ the tricky bit is when the input has valid data and the output is not
+ ready to accept it. if it wasn't for the clock synchronisation, it
+ would be possible to tell the input "hey don't send that data, we're
+ not ready". unfortunately, it's not possible to "change the past":
+ the previous stage *has no choice* but to pass on its data.
+
+ therefore, the incoming data *must* be accepted - and stored: that
+ is the responsibility / contract that this stage *must* accept.
+ on the same clock, it's possible to tell the input that it must
+ not send any more data. this is the "stall" condition.
+
+ we now effectively have *two* possible pieces of data to "choose" from:
+ the buffered data, and the incoming data. the decision as to which
+ to process and output is based on whether we are in "stall" or not.
+ i.e. when the next stage is no longer ready, the output comes from
+ the buffer if a stall had previously occurred, otherwise it comes
+ direct from processing the input.
+
+ this allows us to respect a synchronous "travelling STB" with what
+ dan calls a "buffered handshake".
+
+ it's quite a complex state machine!
+
+ SimpleHandshake
+ ---------------
+
+ Synchronised pipeline, Based on:
+ https://github.com/ZipCPU/dbgbus/blob/master/hexbus/rtl/hbdeword.v
+"""
+
+from nmigen import Signal, Mux, Module, Elaboratable
+from nmigen.cli import verilog, rtlil
+from nmigen.hdl.rec import Record
+
+from queue import Queue
+import inspect
+
+from iocontrol import (PrevControl, NextControl, Object, RecordObject)
+from stageapi import (_spec, StageCls, Stage, StageChain, StageHelper)
+import nmoperator
+
+
+class RecordBasedStage(Stage):
+ """ convenience class which provides a Records-based layout.
+ honestly it's a lot easier just to create a direct Records-based
+ class (see ExampleAddRecordStage)
+ """
+ def __init__(self, in_shape, out_shape, processfn, setupfn=None):
+ self.in_shape = in_shape
+ self.out_shape = out_shape
+ self.__process = processfn
+ self.__setup = setupfn
+ def ispec(self): return Record(self.in_shape)
+ def ospec(self): return Record(self.out_shape)
+ def process(seif, i): return self.__process(i)
+ def setup(seif, m, i): return self.__setup(m, i)
+
+
+class PassThroughStage(StageCls):
+ """ a pass-through stage with its input data spec identical to its output,
+ and "passes through" its data from input to output (does nothing).
+
+ use this basically to explicitly make any data spec Stage-compliant.
+ (many APIs would potentially use a static "wrap" method in e.g.
+ StageCls to achieve a similar effect)
+ """
+ def __init__(self, iospecfn): self.iospecfn = iospecfn
+ def ispec(self): return self.iospecfn()
+ def ospec(self): return self.iospecfn()
+
+
+class ControlBase(StageHelper, Elaboratable):
+ """ Common functions for Pipeline API. Note: a "pipeline stage" only
+ exists (conceptually) when a ControlBase derivative is handed
+ a Stage (combinatorial block)
+
+ NOTE: ControlBase derives from StageHelper, making it accidentally
+ compliant with the Stage API. Using those functions directly
+ *BYPASSES* a ControlBase instance ready/valid signalling, which
+ clearly should not be done without a really, really good reason.
+ """
+ def __init__(self, stage=None, in_multi=None, stage_ctl=False):
+ """ Base class containing ready/valid/data to previous and next stages
+
+ * p: contains ready/valid to the previous stage
+ * n: contains ready/valid to the next stage
+
+ Except when calling Controlbase.connect(), user must also:
+ * add data_i member to PrevControl (p) and
+ * add data_o member to NextControl (n)
+ Calling ControlBase._new_data is a good way to do that.
+ """
+ StageHelper.__init__(self, stage)
+
+ # set up input and output IO ACK (prev/next ready/valid)
+ self.p = PrevControl(in_multi, stage_ctl)
+ self.n = NextControl(stage_ctl)
+
+ # set up the input and output data
+ if stage is not None:
+ self._new_data("data")
+
+ def _new_data(self, name):
+ """ allocates new data_i and data_o
+ """
+ self.p.data_i, self.n.data_o = self.new_specs(name)
+
+ @property
+ def data_r(self):
+ return self.process(self.p.data_i)
+
+ def connect_to_next(self, nxt):
+ """ helper function to connect to the next stage data/valid/ready.
+ """
+ return self.n.connect_to_next(nxt.p)
+
+ def _connect_in(self, prev):
+ """ internal helper function to connect stage to an input source.
+ do not use to connect stage-to-stage!
+ """
+ return self.p._connect_in(prev.p)
+
+ def _connect_out(self, nxt):
+ """ internal helper function to connect stage to an output source.
+ do not use to connect stage-to-stage!
+ """
+ return self.n._connect_out(nxt.n)
+
+ def connect(self, pipechain):
+ """ connects a chain (list) of Pipeline instances together and
+ links them to this ControlBase instance:
+
+ in <----> self <---> out
+ | ^
+ v |
+ [pipe1, pipe2, pipe3, pipe4]
+ | ^ | ^ | ^
+ v | v | v |
+ out---in out--in out---in
+
+ Also takes care of allocating data_i/data_o, by looking up
+ the data spec for each end of the pipechain. i.e It is NOT
+ necessary to allocate self.p.data_i or self.n.data_o manually:
+ this is handled AUTOMATICALLY, here.
+
+ Basically this function is the direct equivalent of StageChain,
+ except that unlike StageChain, the Pipeline logic is followed.
+
+ Just as StageChain presents an object that conforms to the
+ Stage API from a list of objects that also conform to the
+ Stage API, an object that calls this Pipeline connect function
+ has the exact same pipeline API as the list of pipline objects
+ it is called with.
+
+ Thus it becomes possible to build up larger chains recursively.
+ More complex chains (multi-input, multi-output) will have to be
+ done manually.
+
+ Argument:
+
+ * :pipechain: - a sequence of ControlBase-derived classes
+ (must be one or more in length)
+
+ Returns:
+
+ * a list of eq assignments that will need to be added in
+ an elaborate() to m.d.comb
+ """
+ assert len(pipechain) > 0, "pipechain must be non-zero length"
+ assert self.stage is None, "do not use connect with a stage"
+ eqs = [] # collated list of assignment statements
+
+ # connect inter-chain
+ for i in range(len(pipechain)-1):
+ pipe1 = pipechain[i] # earlier
+ pipe2 = pipechain[i+1] # later (by 1)
+ eqs += pipe1.connect_to_next(pipe2) # earlier n to later p
+
+ # connect front and back of chain to ourselves
+ front = pipechain[0] # first in chain
+ end = pipechain[-1] # last in chain
+ self.set_specs(front, end) # sets up ispec/ospec functions
+ self._new_data("chain") # NOTE: REPLACES existing data
+ eqs += front._connect_in(self) # front p to our p
+ eqs += end._connect_out(self) # end n to our n
+
+ return eqs
+
+ def set_input(self, i):
+ """ helper function to set the input data (used in unit tests)
+ """
+ return nmoperator.eq(self.p.data_i, i)
+
+ def __iter__(self):
+ yield from self.p # yields ready/valid/data (data also gets yielded)
+ yield from self.n # ditto
+
+ def ports(self):
+ return list(self)
+
+ def elaborate(self, platform):
+ """ handles case where stage has dynamic ready/valid functions
+ """
+ m = Module()
+ m.submodules.p = self.p
+ m.submodules.n = self.n
+
+ self.setup(m, self.p.data_i)
+
+ if not self.p.stage_ctl:
+ return m
+
+ # intercept the previous (outgoing) "ready", combine with stage ready
+ m.d.comb += self.p.s_ready_o.eq(self.p._ready_o & self.stage.d_ready)
+
+ # intercept the next (incoming) "ready" and combine it with data valid
+ sdv = self.stage.d_valid(self.n.ready_i)
+ m.d.comb += self.n.d_valid.eq(self.n.ready_i & sdv)
+
+ return m
+
+
+class BufferedHandshake(ControlBase):
+ """ buffered pipeline stage. data and strobe signals travel in sync.
+ if ever the input is ready and the output is not, processed data
+ is shunted in a temporary register.
+
+ Argument: stage. see Stage API above
+
+ stage-1 p.valid_i >>in stage n.valid_o out>> stage+1
+ stage-1 p.ready_o <<out stage n.ready_i <<in stage+1
+ stage-1 p.data_i >>in stage n.data_o out>> stage+1
+ | |
+ process --->----^
+ | |
+ +-- r_data ->-+
+
+ input data p.data_i is read (only), is processed and goes into an
+ intermediate result store [process()]. this is updated combinatorially.
+
+ in a non-stall condition, the intermediate result will go into the
+ output (update_output). however if ever there is a stall, it goes
+ into r_data instead [update_buffer()].
+
+ when the non-stall condition is released, r_data is the first
+ to be transferred to the output [flush_buffer()], and the stall
+ condition cleared.
+
+ on the next cycle (as long as stall is not raised again) the
+ input may begin to be processed and transferred directly to output.
+ """
+
+ def elaborate(self, platform):
+ self.m = ControlBase.elaborate(self, platform)
+
+ result = _spec(self.stage.ospec, "r_tmp")
+ r_data = _spec(self.stage.ospec, "r_data")
+
+ # establish some combinatorial temporaries
+ o_n_validn = Signal(reset_less=True)
+ n_ready_i = Signal(reset_less=True, name="n_i_rdy_data")
+ nir_por = Signal(reset_less=True)
+ nir_por_n = Signal(reset_less=True)
+ p_valid_i = Signal(reset_less=True)
+ nir_novn = Signal(reset_less=True)
+ nirn_novn = Signal(reset_less=True)
+ por_pivn = Signal(reset_less=True)
+ npnn = Signal(reset_less=True)
+ self.m.d.comb += [p_valid_i.eq(self.p.valid_i_test),
+ o_n_validn.eq(~self.n.valid_o),
+ n_ready_i.eq(self.n.ready_i_test),
+ nir_por.eq(n_ready_i & self.p._ready_o),
+ nir_por_n.eq(n_ready_i & ~self.p._ready_o),
+ nir_novn.eq(n_ready_i | o_n_validn),
+ nirn_novn.eq(~n_ready_i & o_n_validn),
+ npnn.eq(nir_por | nirn_novn),
+ por_pivn.eq(self.p._ready_o & ~p_valid_i)
+ ]
+
+ # store result of processing in combinatorial temporary
+ self.m.d.comb += nmoperator.eq(result, self.data_r)
+
+ # if not in stall condition, update the temporary register
+ with self.m.If(self.p.ready_o): # not stalled
+ self.m.d.sync += nmoperator.eq(r_data, result) # update buffer
+
+ # data pass-through conditions
+ with self.m.If(npnn):
+ data_o = self._postprocess(result) # XXX TBD, does nothing right now
+ self.m.d.sync += [self.n.valid_o.eq(p_valid_i), # valid if p_valid
+ nmoperator.eq(self.n.data_o, data_o), # update out
+ ]
+ # buffer flush conditions (NOTE: can override data passthru conditions)
+ with self.m.If(nir_por_n): # not stalled
+ # Flush the [already processed] buffer to the output port.
+ data_o = self._postprocess(r_data) # XXX TBD, does nothing right now
+ self.m.d.sync += [self.n.valid_o.eq(1), # reg empty
+ nmoperator.eq(self.n.data_o, data_o), # flush
+ ]
+ # output ready conditions
+ self.m.d.sync += self.p._ready_o.eq(nir_novn | por_pivn)
+
+ return self.m
+
+
+class SimpleHandshake(ControlBase):
+ """ simple handshake control. data and strobe signals travel in sync.
+ implements the protocol used by Wishbone and AXI4.
+
+ Argument: stage. see Stage API above
+
+ stage-1 p.valid_i >>in stage n.valid_o out>> stage+1
+ stage-1 p.ready_o <<out stage n.ready_i <<in stage+1
+ stage-1 p.data_i >>in stage n.data_o out>> stage+1
+ | |
+ +--process->--^
+ Truth Table
+
+ Inputs Temporary Output Data
+ ------- ---------- ----- ----
+ P P N N PiV& ~NiR& N P
+ i o i o PoR NoV o o
+ V R R V V R
+
+ ------- - - - -
+ 0 0 0 0 0 0 >0 0 reg
+ 0 0 0 1 0 1 >1 0 reg
+ 0 0 1 0 0 0 0 1 process(data_i)
+ 0 0 1 1 0 0 0 1 process(data_i)
+ ------- - - - -
+ 0 1 0 0 0 0 >0 0 reg
+ 0 1 0 1 0 1 >1 0 reg
+ 0 1 1 0 0 0 0 1 process(data_i)
+ 0 1 1 1 0 0 0 1 process(data_i)
+ ------- - - - -
+ 1 0 0 0 0 0 >0 0 reg
+ 1 0 0 1 0 1 >1 0 reg
+ 1 0 1 0 0 0 0 1 process(data_i)
+ 1 0 1 1 0 0 0 1 process(data_i)
+ ------- - - - -
+ 1 1 0 0 1 0 1 0 process(data_i)
+ 1 1 0 1 1 1 1 0 process(data_i)
+ 1 1 1 0 1 0 1 1 process(data_i)
+ 1 1 1 1 1 0 1 1 process(data_i)
+ ------- - - - -
+ """
+
+ def elaborate(self, platform):
+ self.m = m = ControlBase.elaborate(self, platform)
+
+ r_busy = Signal()
+ result = _spec(self.stage.ospec, "r_tmp")
+
+ # establish some combinatorial temporaries
+ n_ready_i = Signal(reset_less=True, name="n_i_rdy_data")
+ p_valid_i_p_ready_o = Signal(reset_less=True)
+ p_valid_i = Signal(reset_less=True)
+ m.d.comb += [p_valid_i.eq(self.p.valid_i_test),
+ n_ready_i.eq(self.n.ready_i_test),
+ p_valid_i_p_ready_o.eq(p_valid_i & self.p.ready_o),
+ ]
+
+ # store result of processing in combinatorial temporary
+ m.d.comb += nmoperator.eq(result, self.data_r)
+
+ # previous valid and ready
+ with m.If(p_valid_i_p_ready_o):
+ data_o = self._postprocess(result) # XXX TBD, does nothing right now
+ m.d.sync += [r_busy.eq(1), # output valid
+ nmoperator.eq(self.n.data_o, data_o), # update output
+ ]
+ # previous invalid or not ready, however next is accepting
+ with m.Elif(n_ready_i):
+ data_o = self._postprocess(result) # XXX TBD, does nothing right now
+ m.d.sync += [nmoperator.eq(self.n.data_o, data_o)]
+ # TODO: could still send data here (if there was any)
+ #m.d.sync += self.n.valid_o.eq(0) # ...so set output invalid
+ m.d.sync += r_busy.eq(0) # ...so set output invalid
+
+ m.d.comb += self.n.valid_o.eq(r_busy)
+ # if next is ready, so is previous
+ m.d.comb += self.p._ready_o.eq(n_ready_i)
+
+ return self.m
+
+
+class UnbufferedPipeline(ControlBase):
+ """ A simple pipeline stage with single-clock synchronisation
+ and two-way valid/ready synchronised signalling.
+
+ Note that a stall in one stage will result in the entire pipeline
+ chain stalling.
+
+ Also that unlike BufferedHandshake, the valid/ready signalling does NOT
+ travel synchronously with the data: the valid/ready signalling
+ combines in a *combinatorial* fashion. Therefore, a long pipeline
+ chain will lengthen propagation delays.
+
+ Argument: stage. see Stage API, above
+
+ stage-1 p.valid_i >>in stage n.valid_o out>> stage+1
+ stage-1 p.ready_o <<out stage n.ready_i <<in stage+1
+ stage-1 p.data_i >>in stage n.data_o out>> stage+1
+ | |
+ r_data result
+ | |
+ +--process ->-+
+
+ Attributes:
+ -----------
+ p.data_i : StageInput, shaped according to ispec
+ The pipeline input
+ p.data_o : StageOutput, shaped according to ospec
+ The pipeline output
+ r_data : input_shape according to ispec
+ A temporary (buffered) copy of a prior (valid) input.
+ This is HELD if the output is not ready. It is updated
+ SYNCHRONOUSLY.
+ result: output_shape according to ospec
+ The output of the combinatorial logic. it is updated
+ COMBINATORIALLY (no clock dependence).
+
+ Truth Table
+
+ Inputs Temp Output Data
+ ------- - ----- ----
+ P P N N ~NiR& N P
+ i o i o NoV o o
+ V R R V V R
+
+ ------- - - -
+ 0 0 0 0 0 0 1 reg
+ 0 0 0 1 1 1 0 reg
+ 0 0 1 0 0 0 1 reg
+ 0 0 1 1 0 0 1 reg
+ ------- - - -
+ 0 1 0 0 0 0 1 reg
+ 0 1 0 1 1 1 0 reg
+ 0 1 1 0 0 0 1 reg
+ 0 1 1 1 0 0 1 reg
+ ------- - - -
+ 1 0 0 0 0 1 1 reg
+ 1 0 0 1 1 1 0 reg
+ 1 0 1 0 0 1 1 reg
+ 1 0 1 1 0 1 1 reg
+ ------- - - -
+ 1 1 0 0 0 1 1 process(data_i)
+ 1 1 0 1 1 1 0 process(data_i)
+ 1 1 1 0 0 1 1 process(data_i)
+ 1 1 1 1 0 1 1 process(data_i)
+ ------- - - -
+
+ Note: PoR is *NOT* involved in the above decision-making.
+ """
+
+ def elaborate(self, platform):
+ self.m = m = ControlBase.elaborate(self, platform)
+
+ data_valid = Signal() # is data valid or not
+ r_data = _spec(self.stage.ospec, "r_tmp") # output type
+
+ # some temporaries
+ p_valid_i = Signal(reset_less=True)
+ pv = Signal(reset_less=True)
+ buf_full = Signal(reset_less=True)
+ m.d.comb += p_valid_i.eq(self.p.valid_i_test)
+ m.d.comb += pv.eq(self.p.valid_i & self.p.ready_o)
+ m.d.comb += buf_full.eq(~self.n.ready_i_test & data_valid)
+
+ m.d.comb += self.n.valid_o.eq(data_valid)
+ m.d.comb += self.p._ready_o.eq(~data_valid | self.n.ready_i_test)
+ m.d.sync += data_valid.eq(p_valid_i | buf_full)
+
+ with m.If(pv):
+ m.d.sync += nmoperator.eq(r_data, self.data_r)
+ data_o = self._postprocess(r_data) # XXX TBD, does nothing right now
+ m.d.comb += nmoperator.eq(self.n.data_o, data_o)
+
+ return self.m
+
+class UnbufferedPipeline2(ControlBase):
+ """ A simple pipeline stage with single-clock synchronisation
+ and two-way valid/ready synchronised signalling.
+
+ Note that a stall in one stage will result in the entire pipeline
+ chain stalling.
+
+ Also that unlike BufferedHandshake, the valid/ready signalling does NOT
+ travel synchronously with the data: the valid/ready signalling
+ combines in a *combinatorial* fashion. Therefore, a long pipeline
+ chain will lengthen propagation delays.
+
+ Argument: stage. see Stage API, above
+
+ stage-1 p.valid_i >>in stage n.valid_o out>> stage+1
+ stage-1 p.ready_o <<out stage n.ready_i <<in stage+1
+ stage-1 p.data_i >>in stage n.data_o out>> stage+1
+ | | |
+ +- process-> buf <-+
+ Attributes:
+ -----------
+ p.data_i : StageInput, shaped according to ispec
+ The pipeline input
+ p.data_o : StageOutput, shaped according to ospec
+ The pipeline output
+ buf : output_shape according to ospec
+ A temporary (buffered) copy of a valid output
+ This is HELD if the output is not ready. It is updated
+ SYNCHRONOUSLY.
+
+ Inputs Temp Output Data
+ ------- - -----
+ P P N N ~NiR& N P (buf_full)
+ i o i o NoV o o
+ V R R V V R
+
+ ------- - - -
+ 0 0 0 0 0 0 1 process(data_i)
+ 0 0 0 1 1 1 0 reg (odata, unchanged)
+ 0 0 1 0 0 0 1 process(data_i)
+ 0 0 1 1 0 0 1 process(data_i)
+ ------- - - -
+ 0 1 0 0 0 0 1 process(data_i)
+ 0 1 0 1 1 1 0 reg (odata, unchanged)
+ 0 1 1 0 0 0 1 process(data_i)
+ 0 1 1 1 0 0 1 process(data_i)
+ ------- - - -
+ 1 0 0 0 0 1 1 process(data_i)
+ 1 0 0 1 1 1 0 reg (odata, unchanged)
+ 1 0 1 0 0 1 1 process(data_i)
+ 1 0 1 1 0 1 1 process(data_i)
+ ------- - - -
+ 1 1 0 0 0 1 1 process(data_i)
+ 1 1 0 1 1 1 0 reg (odata, unchanged)
+ 1 1 1 0 0 1 1 process(data_i)
+ 1 1 1 1 0 1 1 process(data_i)
+ ------- - - -
+
+ Note: PoR is *NOT* involved in the above decision-making.
+ """
+
+ def elaborate(self, platform):
+ self.m = m = ControlBase.elaborate(self, platform)
+
+ buf_full = Signal() # is data valid or not
+ buf = _spec(self.stage.ospec, "r_tmp") # output type
+
+ # some temporaries
+ p_valid_i = Signal(reset_less=True)
+ m.d.comb += p_valid_i.eq(self.p.valid_i_test)
+
+ m.d.comb += self.n.valid_o.eq(buf_full | p_valid_i)
+ m.d.comb += self.p._ready_o.eq(~buf_full)
+ m.d.sync += buf_full.eq(~self.n.ready_i_test & self.n.valid_o)
+
+ data_o = Mux(buf_full, buf, self.data_r)
+ data_o = self._postprocess(data_o) # XXX TBD, does nothing right now
+ m.d.comb += nmoperator.eq(self.n.data_o, data_o)
+ m.d.sync += nmoperator.eq(buf, self.n.data_o)
+
+ return self.m
+
+
+class PassThroughHandshake(ControlBase):
+ """ A control block that delays by one clock cycle.
+
+ Inputs Temporary Output Data
+ ------- ------------------ ----- ----
+ P P N N PiV& PiV| NiR| pvr N P (pvr)
+ i o i o PoR ~PoR ~NoV o o
+ V R R V V R
+
+ ------- - - - - - -
+ 0 0 0 0 0 1 1 0 1 1 odata (unchanged)
+ 0 0 0 1 0 1 0 0 1 0 odata (unchanged)
+ 0 0 1 0 0 1 1 0 1 1 odata (unchanged)
+ 0 0 1 1 0 1 1 0 1 1 odata (unchanged)
+ ------- - - - - - -
+ 0 1 0 0 0 0 1 0 0 1 odata (unchanged)
+ 0 1 0 1 0 0 0 0 0 0 odata (unchanged)
+ 0 1 1 0 0 0 1 0 0 1 odata (unchanged)
+ 0 1 1 1 0 0 1 0 0 1 odata (unchanged)
+ ------- - - - - - -
+ 1 0 0 0 0 1 1 1 1 1 process(in)
+ 1 0 0 1 0 1 0 0 1 0 odata (unchanged)
+ 1 0 1 0 0 1 1 1 1 1 process(in)
+ 1 0 1 1 0 1 1 1 1 1 process(in)
+ ------- - - - - - -
+ 1 1 0 0 1 1 1 1 1 1 process(in)
+ 1 1 0 1 1 1 0 0 1 0 odata (unchanged)
+ 1 1 1 0 1 1 1 1 1 1 process(in)
+ 1 1 1 1 1 1 1 1 1 1 process(in)
+ ------- - - - - - -
+
+ """
+
+ def elaborate(self, platform):
+ self.m = m = ControlBase.elaborate(self, platform)
+
+ r_data = _spec(self.stage.ospec, "r_tmp") # output type
+
+ # temporaries
+ p_valid_i = Signal(reset_less=True)
+ pvr = Signal(reset_less=True)
+ m.d.comb += p_valid_i.eq(self.p.valid_i_test)
+ m.d.comb += pvr.eq(p_valid_i & self.p.ready_o)
+
+ m.d.comb += self.p.ready_o.eq(~self.n.valid_o | self.n.ready_i_test)
+ m.d.sync += self.n.valid_o.eq(p_valid_i | ~self.p.ready_o)
+
+ odata = Mux(pvr, self.data_r, r_data)
+ m.d.sync += nmoperator.eq(r_data, odata)
+ r_data = self._postprocess(r_data) # XXX TBD, does nothing right now
+ m.d.comb += nmoperator.eq(self.n.data_o, r_data)
+
+ return m
+
+
+class RegisterPipeline(UnbufferedPipeline):
+ """ A pipeline stage that delays by one clock cycle, creating a
+ sync'd latch out of data_o and valid_o as an indirect byproduct
+ of using PassThroughStage
+ """
+ def __init__(self, iospecfn):
+ UnbufferedPipeline.__init__(self, PassThroughStage(iospecfn))
+
+
+class FIFOControl(ControlBase):
+ """ FIFO Control. Uses Queue to store data, coincidentally
+ happens to have same valid/ready signalling as Stage API.
+
+ data_i -> fifo.din -> FIFO -> fifo.dout -> data_o
+ """
+ def __init__(self, depth, stage, in_multi=None, stage_ctl=False,
+ fwft=True, pipe=False):
+ """ FIFO Control
+
+ * :depth: number of entries in the FIFO
+ * :stage: data processing block
+ * :fwft: first word fall-thru mode (non-fwft introduces delay)
+ * :pipe: specifies pipe mode.
+
+ when fwft = True it indicates that transfers may occur
+ combinatorially through stage processing in the same clock cycle.
+ This requires that the Stage be a Moore FSM:
+ https://en.wikipedia.org/wiki/Moore_machine
+
+ when fwft = False it indicates that all output signals are
+ produced only from internal registers or memory, i.e. that the
+ Stage is a Mealy FSM:
+ https://en.wikipedia.org/wiki/Mealy_machine
+
+ data is processed (and located) as follows:
+
+ self.p self.stage temp fn temp fn temp fp self.n
+ data_i->process()->result->cat->din.FIFO.dout->cat(data_o)
+
+ yes, really: cat produces a Cat() which can be assigned to.
+ this is how the FIFO gets de-catted without needing a de-cat
+ function
+ """
+ self.fwft = fwft
+ self.pipe = pipe
+ self.fdepth = depth
+ ControlBase.__init__(self, stage, in_multi, stage_ctl)
+
+ def elaborate(self, platform):
+ self.m = m = ControlBase.elaborate(self, platform)
+
+ # make a FIFO with a signal of equal width to the data_o.
+ (fwidth, _) = nmoperator.shape(self.n.data_o)
+ fifo = Queue(fwidth, self.fdepth, fwft=self.fwft, pipe=self.pipe)
+ m.submodules.fifo = fifo
+
+ def processfn(data_i):
+ # store result of processing in combinatorial temporary
+ result = _spec(self.stage.ospec, "r_temp")
+ m.d.comb += nmoperator.eq(result, self.process(data_i))
+ return nmoperator.cat(result)
+
+ ## prev: make the FIFO (Queue object) "look" like a PrevControl...
+ m.submodules.fp = fp = PrevControl()
+ fp.valid_i, fp._ready_o, fp.data_i = fifo.we, fifo.writable, fifo.din
+ m.d.comb += fp._connect_in(self.p, fn=processfn)
+
+ # next: make the FIFO (Queue object) "look" like a NextControl...
+ m.submodules.fn = fn = NextControl()
+ fn.valid_o, fn.ready_i, fn.data_o = fifo.readable, fifo.re, fifo.dout
+ connections = fn._connect_out(self.n, fn=nmoperator.cat)
+
+ # ok ok so we can't just do the ready/valid eqs straight:
+ # first 2 from connections are the ready/valid, 3rd is data.
+ if self.fwft:
+ m.d.comb += connections[:2] # combinatorial on next ready/valid
+ else:
+ m.d.sync += connections[:2] # non-fwft mode needs sync
+ data_o = connections[2] # get the data
+ data_o = self._postprocess(data_o) # XXX TBD, does nothing right now
+ m.d.comb += data_o
+
+ return m
+
+
+# aka "RegStage".
+class UnbufferedPipeline(FIFOControl):
+ def __init__(self, stage, in_multi=None, stage_ctl=False):
+ FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl,
+ fwft=True, pipe=False)
+
+# aka "BreakReadyStage" XXX had to set fwft=True to get it to work
+class PassThroughHandshake(FIFOControl):
+ def __init__(self, stage, in_multi=None, stage_ctl=False):
+ FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl,
+ fwft=True, pipe=True)
+
+# this is *probably* BufferedHandshake, although test #997 now succeeds.
+class BufferedHandshake(FIFOControl):
+ def __init__(self, stage, in_multi=None, stage_ctl=False):
+ FIFOControl.__init__(self, 2, stage, in_multi, stage_ctl,
+ fwft=True, pipe=False)
+
+
+"""
+# this is *probably* SimpleHandshake (note: memory cell size=0)
+class SimpleHandshake(FIFOControl):
+ def __init__(self, stage, in_multi=None, stage_ctl=False):
+ FIFOControl.__init__(self, 0, stage, in_multi, stage_ctl,
+ fwft=True, pipe=False)
+"""
--- /dev/null
+""" Stage API
+
+ Associated development bugs:
+ * http://bugs.libre-riscv.org/show_bug.cgi?id=64
+ * http://bugs.libre-riscv.org/show_bug.cgi?id=57
+
+ Stage API:
+ ---------
+
+ stage requires compliance with a strict API that may be
+ implemented in several means, including as a static class.
+
+ Stages do not HOLD data, and they definitely do not contain
+ signalling (ready/valid). They do however specify the FORMAT
+ of the incoming and outgoing data, and they provide a means to
+ PROCESS that data (from incoming format to outgoing format).
+
+ Stage Blocks really should be combinatorial blocks (Moore FSMs).
+ It would be ok to have input come in from sync'd sources
+ (clock-driven, Mealy FSMs) however by doing so they would no longer
+ be deterministic, and chaining such blocks with such side-effects
+ together could result in unexpected, unpredictable, unreproduceable
+ behaviour.
+
+ So generally to be avoided, then unless you know what you are doing.
+ https://en.wikipedia.org/wiki/Moore_machine
+ https://en.wikipedia.org/wiki/Mealy_machine
+
+ the methods of a stage instance must be as follows:
+
+ * ispec() - Input data format specification. Takes a bit of explaining.
+ The requirements are: something that eventually derives from
+ nmigen Value must be returned *OR* an iterator or iterable
+ or sequence (list, tuple etc.) or generator must *yield*
+ thing(s) that (eventually) derive from the nmigen Value class.
+
+ Complex to state, very simple in practice:
+ see test_buf_pipe.py for over 25 worked examples.
+
+ * ospec() - Output data format specification.
+ format requirements identical to ispec.
+
+ * process(m, i) - Optional function for processing ispec-formatted data.
+ returns a combinatorial block of a result that
+ may be assigned to the output, by way of the "nmoperator.eq"
+ function. Note that what is returned here can be
+ extremely flexible. Even a dictionary can be returned
+ as long as it has fields that match precisely with the
+ Record into which its values is intended to be assigned.
+ Again: see example unit tests for details.
+
+ * setup(m, i) - Optional function for setting up submodules.
+ may be used for more complex stages, to link
+ the input (i) to submodules. must take responsibility
+ for adding those submodules to the module (m).
+ the submodules must be combinatorial blocks and
+ must have their inputs and output linked combinatorially.
+
+ Both StageCls (for use with non-static classes) and Stage (for use
+ by static classes) are abstract classes from which, for convenience
+ and as a courtesy to other developers, anything conforming to the
+ Stage API may *choose* to derive. See Liskov Substitution Principle:
+ https://en.wikipedia.org/wiki/Liskov_substitution_principle
+
+ StageChain:
+ ----------
+
+ A useful combinatorial wrapper around stages that chains them together
+ and then presents a Stage-API-conformant interface. By presenting
+ the same API as the stages it wraps, it can clearly be used recursively.
+
+ StageHelper:
+ ----------
+
+ A convenience wrapper around a Stage-API-compliant "thing" which
+ complies with the Stage API and provides mandatory versions of
+ all the optional bits.
+"""
+
+from abc import ABCMeta, abstractmethod
+import inspect
+
+import nmoperator
+
+
+def _spec(fn, name=None):
+ """ useful function that determines if "fn" has an argument "name".
+ if so, fn(name) is called otherwise fn() is called.
+
+ means that ispec and ospec can be declared with *or without*
+ a name argument. normally it would be necessary to have
+ "ispec(name=None)" to achieve the same effect.
+ """
+ if name is None:
+ return fn()
+ varnames = dict(inspect.getmembers(fn.__code__))['co_varnames']
+ if 'name' in varnames:
+ return fn(name=name)
+ return fn()
+
+
+class StageCls(metaclass=ABCMeta):
+ """ Class-based "Stage" API. requires instantiation (after derivation)
+
+ see "Stage API" above.. Note: python does *not* require derivation
+ from this class. All that is required is that the pipelines *have*
+ the functions listed in this class. Derivation from this class
+ is therefore merely a "courtesy" to maintainers.
+ """
+ @abstractmethod
+ def ispec(self): pass # REQUIRED
+ @abstractmethod
+ def ospec(self): pass # REQUIRED
+ #@abstractmethod
+ #def setup(self, m, i): pass # OPTIONAL
+ #@abstractmethod
+ #def process(self, i): pass # OPTIONAL
+
+
+class Stage(metaclass=ABCMeta):
+ """ Static "Stage" API. does not require instantiation (after derivation)
+
+ see "Stage API" above. Note: python does *not* require derivation
+ from this class. All that is required is that the pipelines *have*
+ the functions listed in this class. Derivation from this class
+ is therefore merely a "courtesy" to maintainers.
+ """
+ @staticmethod
+ @abstractmethod
+ def ispec(): pass
+
+ @staticmethod
+ @abstractmethod
+ def ospec(): pass
+
+ #@staticmethod
+ #@abstractmethod
+ #def setup(m, i): pass
+
+ #@staticmethod
+ #@abstractmethod
+ #def process(i): pass
+
+
+class StageHelper(Stage):
+ """ a convenience wrapper around something that is Stage-API-compliant.
+ (that "something" may be a static class, for example).
+
+ StageHelper happens to also be compliant with the Stage API,
+ it differs from the stage that it wraps in that all the "optional"
+ functions are provided (hence the designation "convenience wrapper")
+ """
+ def __init__(self, stage):
+ self.stage = stage
+ self._ispecfn = None
+ self._ospecfn = None
+ if stage is not None:
+ self.set_specs(self, self)
+
+ def ospec(self, name):
+ assert self._ospecfn is not None
+ return _spec(self._ospecfn, name)
+
+ def ispec(self, name):
+ assert self._ispecfn is not None
+ return _spec(self._ispecfn, name)
+
+ def set_specs(self, p, n):
+ """ sets up the ispecfn and ospecfn for getting input and output data
+ """
+ if hasattr(p, "stage"):
+ p = p.stage
+ if hasattr(n, "stage"):
+ n = n.stage
+ self._ispecfn = p.ispec
+ self._ospecfn = n.ospec
+
+ def new_specs(self, name):
+ """ allocates new ispec and ospec pair
+ """
+ return (_spec(self.ispec, "%s_i" % name),
+ _spec(self.ospec, "%s_o" % name))
+
+ def process(self, i):
+ if self.stage and hasattr(self.stage, "process"):
+ return self.stage.process(i)
+ return i
+
+ def setup(self, m, i):
+ if self.stage is not None and hasattr(self.stage, "setup"):
+ self.stage.setup(m, i)
+
+ def _postprocess(self, i): # XXX DISABLED
+ return i # RETURNS INPUT
+ if hasattr(self.stage, "postprocess"):
+ return self.stage.postprocess(i)
+ return i
+
+
+class StageChain(StageHelper):
+ """ pass in a list of stages, and they will automatically be
+ chained together via their input and output specs into a
+ combinatorial chain, to create one giant combinatorial block.
+
+ the end result basically conforms to the exact same Stage API.
+
+ * input to this class will be the input of the first stage
+ * output of first stage goes into input of second
+ * output of second goes into input into third
+ * ... (etc. etc.)
+ * the output of this class will be the output of the last stage
+
+ NOTE: whilst this is very similar to ControlBase.connect(), it is
+ *really* important to appreciate that StageChain is pure
+ combinatorial and bypasses (does not involve, at all, ready/valid
+ signalling of any kind).
+
+ ControlBase.connect on the other hand respects, connects, and uses
+ ready/valid signalling.
+
+ Arguments:
+
+ * :chain: a chain of combinatorial blocks conforming to the Stage API
+ NOTE: StageChain.ispec and ospect have to have something
+ to return (beginning and end specs of the chain),
+ therefore the chain argument must be non-zero length
+
+ * :specallocate: if set, new input and output data will be allocated
+ and connected (eq'd) to each chained Stage.
+ in some cases if this is not done, the nmigen warning
+ "driving from two sources, module is being flattened"
+ will be issued.
+
+ NOTE: do NOT use StageChain with combinatorial blocks that have
+ side-effects (state-based / clock-based input) or conditional
+ (inter-chain) dependencies, unless you really know what you are doing.
+ """
+ def __init__(self, chain, specallocate=False):
+ assert len(chain) > 0, "stage chain must be non-zero length"
+ self.chain = chain
+ StageHelper.__init__(self, None)
+ self.setup = self._sa_setup if specallocate else self._na_setup
+ self.set_specs(self.chain[0], self.chain[-1])
+
+ def _sa_setup(self, m, i):
+ for (idx, c) in enumerate(self.chain):
+ if hasattr(c, "setup"):
+ c.setup(m, i) # stage may have some module stuff
+ ofn = self.chain[idx].ospec # last assignment survives
+ o = _spec(ofn, 'chainin%d' % idx)
+ m.d.comb += nmoperator.eq(o, c.process(i)) # process input into "o"
+ if idx == len(self.chain)-1:
+ break
+ ifn = self.chain[idx+1].ispec # new input on next loop
+ i = _spec(ifn, 'chainin%d' % (idx+1))
+ m.d.comb += nmoperator.eq(i, o) # assign to next input
+ self.o = o
+ return self.o # last loop is the output
+
+ def _na_setup(self, m, i):
+ for (idx, c) in enumerate(self.chain):
+ if hasattr(c, "setup"):
+ c.setup(m, i) # stage may have some module stuff
+ i = o = c.process(i) # store input into "o"
+ self.o = o
+ return self.o # last loop is the output
+
+ def process(self, i):
+ return self.o # conform to Stage API: return last-loop output
+
+
--- /dev/null
+from operator import add
+
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+
+from nmigen_add_experiment import FPADD
+
+from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan,
+ is_inf, is_pos_inf, is_neg_inf,
+ match, get_rs_case, check_rs_case, run_test,
+ run_edge_cases, run_corner_cases)
+
+def testbench(dut):
+ yield from check_rs_case(dut, 0x36093399, 0x7f6a12f1, 0x7f6a12f1)
+ yield from check_rs_case(dut, 0x006CE3EE, 0x806CE3EC, 0x00000002)
+ yield from check_rs_case(dut, 0x00000047, 0x80000048, 0x80000001)
+ yield from check_rs_case(dut, 0x000116C2, 0x8001170A, 0x80000048)
+ yield from check_rs_case(dut, 0x7ed01f25, 0xff559e2c, 0xfedb1d33)
+ yield from check_rs_case(dut, 0, 0, 0)
+ yield from check_rs_case(dut, 0xFFFFFFFF, 0xC63B800A, 0x7FC00000)
+ yield from check_rs_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
+ #yield from check_rs_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
+ yield from check_rs_case(dut, 0x7F800000, 0xFF800000, 0x7FC00000)
+ yield from check_rs_case(dut, 0x42540000, 0xC2540000, 0x00000000)
+ yield from check_rs_case(dut, 0xC2540000, 0x42540000, 0x00000000)
+ yield from check_rs_case(dut, 0xfe34f995, 0xff5d59ad, 0xff800000)
+ yield from check_rs_case(dut, 0x82471f51, 0x243985f, 0x801c3790)
+ yield from check_rs_case(dut, 0x40000000, 0xc0000000, 0x00000000)
+ yield from check_rs_case(dut, 0x3F800000, 0x40000000, 0x40400000)
+ yield from check_rs_case(dut, 0x40000000, 0x3F800000, 0x40400000)
+ yield from check_rs_case(dut, 0x447A0000, 0x4488B000, 0x4502D800)
+ yield from check_rs_case(dut, 0x463B800A, 0x42BA8A3D, 0x463CF51E)
+ yield from check_rs_case(dut, 0x42BA8A3D, 0x463B800A, 0x463CF51E)
+ yield from check_rs_case(dut, 0x463B800A, 0xC2BA8A3D, 0x463A0AF6)
+ yield from check_rs_case(dut, 0xC2BA8A3D, 0x463B800A, 0x463A0AF6)
+ yield from check_rs_case(dut, 0xC63B800A, 0x42BA8A3D, 0xC63A0AF6)
+ yield from check_rs_case(dut, 0x42BA8A3D, 0xC63B800A, 0xC63A0AF6)
+ yield from check_rs_case(dut, 0x7F800000, 0x00000000, 0x7F800000)
+ yield from check_rs_case(dut, 0x00000000, 0x7F800000, 0x7F800000)
+ yield from check_rs_case(dut, 0xFF800000, 0x00000000, 0xFF800000)
+ yield from check_rs_case(dut, 0x00000000, 0xFF800000, 0xFF800000)
+ yield from check_rs_case(dut, 0x7F800000, 0x7F800000, 0x7F800000)
+ yield from check_rs_case(dut, 0xFF800000, 0xFF800000, 0xFF800000)
+ yield from check_rs_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
+ yield from check_rs_case(dut, 0x00018643, 0x00FA72A4, 0x00FBF8E7)
+ yield from check_rs_case(dut, 0x001A2239, 0x00FA72A4, 0x010A4A6E)
+ yield from check_rs_case(dut, 0x3F7FFFFE, 0x3F7FFFFE, 0x3FFFFFFE)
+ yield from check_rs_case(dut, 0x7EFFFFEE, 0x7EFFFFEE, 0x7F7FFFEE)
+ yield from check_rs_case(dut, 0x7F7FFFEE, 0xFEFFFFEE, 0x7EFFFFEE)
+ yield from check_rs_case(dut, 0x7F7FFFEE, 0x756CA884, 0x7F7FFFFD)
+ yield from check_rs_case(dut, 0x7F7FFFEE, 0x758A0CF8, 0x7F7FFFFF)
+ yield from check_rs_case(dut, 0x42500000, 0x51A7A358, 0x51A7A358)
+ yield from check_rs_case(dut, 0x51A7A358, 0x42500000, 0x51A7A358)
+ yield from check_rs_case(dut, 0x4E5693A4, 0x42500000, 0x4E5693A5)
+ yield from check_rs_case(dut, 0x42500000, 0x4E5693A4, 0x4E5693A5)
+ #yield from check_rs_case(dut, 1, 0, 1)
+ #yield from check_rs_case(dut, 1, 1, 1)
+
+ count = 0
+
+ #regression tests
+ stimulus_a = [0x80000000, 0x22cb525a, 0x40000000, 0x83e73d5c,
+ 0xbf9b1e94, 0x34082401,
+ 0x5e8ef81, 0x5c75da81, 0x2b017]
+ stimulus_b = [0xff800001, 0xadd79efa, 0xC0000000, 0x1c800000,
+ 0xc038ed3a, 0xb328cd45,
+ 0x114f3db, 0x2f642a39, 0xff3807ab]
+ yield from run_test(dut, stimulus_a, stimulus_b, add, get_rs_case)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ yield from run_corner_cases(dut, count, add, get_rs_case)
+ yield from run_edge_cases(dut, count, add, get_rs_case)
+
+if __name__ == '__main__':
+ dut = FPADD(width=32, id_wid=5, single_cycle=True)
+ run_simulation(dut, testbench(dut), vcd_name="test_add.vcd")
+
--- /dev/null
+from operator import add
+
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+
+from nmigen_add_experiment import FPADD
+
+from unit_test_half import (get_mantissa, get_exponent, get_sign, is_nan,
+ is_inf, is_pos_inf, is_neg_inf,
+ match, get_case, check_case, run_test,
+ run_edge_cases, run_corner_cases)
+
+def testbench(dut):
+ #yield from check_case(dut, 0x7800, 0xff6f, 0xff6f)
+ #yield from check_case(dut, 0x0000, 0x7c32, 0x7e32)
+ #yield from check_case(dut, 0x0000, 0x7da9, 0x7fa9)
+ #yield from check_case(dut, 0x0000, 0x7ea0, 0x7ea0)
+ #yield from check_case(dut, 0x7c9a, 0x8000, 0x7e9a)
+ #yield from check_case(dut, 0x7d5e, 0x0000, 0x7f5e)
+ #yield from check_case(dut, 0x8000, 0x7c8c, 0x7e8c)
+ #yield from check_case(dut, 0x8000, 0xfc55, 0xfe55)
+ #yield from check_case(dut, 0x8000, 0x7e1a, 0x7e1a)
+
+ #yield from check_case(dut, 0x8000, 0xfc01, 0x7e00)
+ yield from check_case(dut, 0xfc00, 0x7c00, 0x7e00)
+ yield from check_case(dut, 0x8000, 0, 0)
+ yield from check_case(dut, 0, 0, 0)
+
+ count = 0
+
+ #regression tests
+ stimulus_a = [ 0x8000, 0x8000 ]
+ stimulus_b = [ 0x0000, 0xfc01 ]
+ yield from run_test(dut, stimulus_a, stimulus_b, add)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ yield from run_corner_cases(dut, count, add)
+ yield from run_edge_cases(dut, count, add)
+
+if __name__ == '__main__':
+ dut = FPADD(width=16, single_cycle=True)
+ run_simulation(dut, testbench(dut), vcd_name="test_add16.vcd")
+
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+from operator import add
+
+from nmigen_add_experiment import FPADD
+
+import sys
+import atexit
+from random import randint
+from random import seed
+
+from unit_test_double import (get_mantissa, get_exponent, get_sign, is_nan,
+ is_inf, is_pos_inf, is_neg_inf,
+ match, get_case, check_case, run_test,
+ run_edge_cases, run_corner_cases)
+
+
+def testbench(dut):
+ yield from check_case(dut, 0, 0, 0)
+ yield from check_case(dut, 0x3FF0000000000000, 0x4000000000000000,
+ 0x4008000000000000)
+ yield from check_case(dut, 0x4000000000000000, 0x3FF0000000000000,
+ 0x4008000000000000)
+ yield from check_case(dut, 0x4056C00000000000, 0x4042800000000000,
+ 0x4060000000000000)
+ yield from check_case(dut, 0x4056C00000000000, 0x4042EA3D70A3D70A,
+ 0x40601A8F5C28F5C2)
+
+ count = 0
+
+ #regression tests
+ stimulus_a = [0x3ff00000000000c5, 0xff80000000000000]
+ stimulus_b = [0xbd28a404211fb72b, 0x7f80000000000000]
+ yield from run_test(dut, stimulus_a, stimulus_b, add)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ yield from run_corner_cases(dut, count, add)
+ yield from run_edge_cases(dut, count, add)
+
+
+if __name__ == '__main__':
+ dut = FPADD(width=64, single_cycle=False)
+ run_simulation(dut, testbench(dut), vcd_name="test_add64.vcd")
+
--- /dev/null
+from random import randint
+from operator import add
+
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+
+from nmigen_add_experiment import FPADDBase, FPADDBaseMod
+
+def get_case(dut, a, b, mid):
+ yield dut.in_mid.eq(mid)
+ yield dut.in_a.eq(a)
+ yield dut.in_b.eq(b)
+ yield dut.in_t.stb.eq(1)
+ yield
+ yield
+ yield
+ yield
+ ack = (yield dut.in_t.ack)
+ assert ack == 0
+
+ yield dut.in_t.stb.eq(0)
+
+ yield dut.out_z.ack.eq(1)
+
+ while True:
+ out_z_stb = (yield dut.out_z.stb)
+ if not out_z_stb:
+ yield
+ continue
+ out_z = yield dut.out_z.v
+ out_mid = yield dut.out_mid
+ yield dut.out_z.ack.eq(0)
+ yield
+ break
+
+ return out_z, out_mid
+
+def check_case(dut, a, b, z, mid=None):
+ if mid is None:
+ mid = randint(0, 6)
+ out_z, out_mid = yield from get_case(dut, a, b, mid)
+ assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
+ assert out_mid == mid, "Output mid 0x%x != expected 0x%x" % (out_mid, mid)
+
+
+
+def testbench(dut):
+ yield from check_case(dut, 0x36093399, 0x7f6a12f1, 0x7f6a12f1)
+ yield from check_case(dut, 0x006CE3EE, 0x806CE3EC, 0x00000002)
+ yield from check_case(dut, 0x00000047, 0x80000048, 0x80000001)
+ yield from check_case(dut, 0x000116C2, 0x8001170A, 0x80000048)
+ yield from check_case(dut, 0x7ed01f25, 0xff559e2c, 0xfedb1d33)
+ yield from check_case(dut, 0, 0, 0)
+ yield from check_case(dut, 0xFFFFFFFF, 0xC63B800A, 0x7FC00000)
+ yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
+ #yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
+ yield from check_case(dut, 0x7F800000, 0xFF800000, 0x7FC00000)
+ yield from check_case(dut, 0x42540000, 0xC2540000, 0x00000000)
+ yield from check_case(dut, 0xC2540000, 0x42540000, 0x00000000)
+ yield from check_case(dut, 0xfe34f995, 0xff5d59ad, 0xff800000)
+ yield from check_case(dut, 0x82471f51, 0x243985f, 0x801c3790)
+ yield from check_case(dut, 0x40000000, 0xc0000000, 0x00000000)
+ yield from check_case(dut, 0x3F800000, 0x40000000, 0x40400000)
+ yield from check_case(dut, 0x40000000, 0x3F800000, 0x40400000)
+ yield from check_case(dut, 0x447A0000, 0x4488B000, 0x4502D800)
+ yield from check_case(dut, 0x463B800A, 0x42BA8A3D, 0x463CF51E)
+ yield from check_case(dut, 0x42BA8A3D, 0x463B800A, 0x463CF51E)
+ yield from check_case(dut, 0x463B800A, 0xC2BA8A3D, 0x463A0AF6)
+ yield from check_case(dut, 0xC2BA8A3D, 0x463B800A, 0x463A0AF6)
+ yield from check_case(dut, 0xC63B800A, 0x42BA8A3D, 0xC63A0AF6)
+ yield from check_case(dut, 0x42BA8A3D, 0xC63B800A, 0xC63A0AF6)
+ yield from check_case(dut, 0x7F800000, 0x00000000, 0x7F800000)
+ yield from check_case(dut, 0x00000000, 0x7F800000, 0x7F800000)
+ yield from check_case(dut, 0xFF800000, 0x00000000, 0xFF800000)
+ yield from check_case(dut, 0x00000000, 0xFF800000, 0xFF800000)
+ yield from check_case(dut, 0x7F800000, 0x7F800000, 0x7F800000)
+ yield from check_case(dut, 0xFF800000, 0xFF800000, 0xFF800000)
+ yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
+ yield from check_case(dut, 0x00018643, 0x00FA72A4, 0x00FBF8E7)
+ yield from check_case(dut, 0x001A2239, 0x00FA72A4, 0x010A4A6E)
+ yield from check_case(dut, 0x3F7FFFFE, 0x3F7FFFFE, 0x3FFFFFFE)
+ yield from check_case(dut, 0x7EFFFFEE, 0x7EFFFFEE, 0x7F7FFFEE)
+ yield from check_case(dut, 0x7F7FFFEE, 0xFEFFFFEE, 0x7EFFFFEE)
+ yield from check_case(dut, 0x7F7FFFEE, 0x756CA884, 0x7F7FFFFD)
+ yield from check_case(dut, 0x7F7FFFEE, 0x758A0CF8, 0x7F7FFFFF)
+ yield from check_case(dut, 0x42500000, 0x51A7A358, 0x51A7A358)
+ yield from check_case(dut, 0x51A7A358, 0x42500000, 0x51A7A358)
+ yield from check_case(dut, 0x4E5693A4, 0x42500000, 0x4E5693A5)
+ yield from check_case(dut, 0x42500000, 0x4E5693A4, 0x4E5693A5)
+
+if __name__ == '__main__':
+ dut = FPADDBaseMod(width=32, id_wid=5, single_cycle=True)
+ run_simulation(dut, testbench(dut), vcd_name="test_add.vcd")
+
--- /dev/null
+""" Unit tests for Buffered and Unbuffered pipelines
+
+ contains useful worked examples of how to use the Pipeline API,
+ including:
+
+ * Combinatorial Stage "Chaining"
+ * class-based data stages
+ * nmigen module-based data stages
+ * special nmigen module-based data stage, where the stage *is* the module
+ * Record-based data stages
+ * static-class data stages
+ * multi-stage pipelines (and how to connect them)
+ * how to *use* the pipelines (see Test5) - how to get data in and out
+
+"""
+
+from nmigen import Module, Signal, Mux, Const, Elaboratable
+from nmigen.hdl.rec import Record
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+
+from example_buf_pipe import ExampleBufPipe, ExampleBufPipeAdd
+from example_buf_pipe import ExamplePipeline, UnbufferedPipeline
+from example_buf_pipe import ExampleStageCls
+from example_buf_pipe import PrevControl, NextControl, BufferedHandshake
+from example_buf_pipe import StageChain, ControlBase, StageCls
+from singlepipe import UnbufferedPipeline2
+from singlepipe import SimpleHandshake
+from singlepipe import PassThroughHandshake
+from singlepipe import PassThroughStage
+from singlepipe import FIFOControl
+from singlepipe import RecordObject
+
+from random import randint, seed
+
+#seed(4)
+
+
+def check_o_n_valid(dut, val):
+ o_n_valid = yield dut.n.valid_o
+ assert o_n_valid == val
+
+def check_o_n_valid2(dut, val):
+ o_n_valid = yield dut.n.valid_o
+ assert o_n_valid == val
+
+
+def tbench(dut):
+ #yield dut.i_p_rst.eq(1)
+ yield dut.n.ready_i.eq(0)
+ #yield dut.p.ready_o.eq(0)
+ yield
+ yield
+ #yield dut.i_p_rst.eq(0)
+ yield dut.n.ready_i.eq(1)
+ yield dut.p.data_i.eq(5)
+ yield dut.p.valid_i.eq(1)
+ yield
+
+ yield dut.p.data_i.eq(7)
+ yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed
+ yield
+ yield from check_o_n_valid(dut, 1) # ok *now* i_p_valid effect is felt
+
+ yield dut.p.data_i.eq(2)
+ yield
+ yield dut.n.ready_i.eq(0) # begin going into "stall" (next stage says ready)
+ yield dut.p.data_i.eq(9)
+ yield
+ yield dut.p.valid_i.eq(0)
+ yield dut.p.data_i.eq(12)
+ yield
+ yield dut.p.data_i.eq(32)
+ yield dut.n.ready_i.eq(1)
+ yield
+ yield from check_o_n_valid(dut, 1) # buffer still needs to output
+ yield
+ yield from check_o_n_valid(dut, 1) # buffer still needs to output
+ yield
+ yield from check_o_n_valid(dut, 0) # buffer outputted, *now* we're done.
+ yield
+
+
+def tbench2(dut):
+ #yield dut.p.i_rst.eq(1)
+ yield dut.n.ready_i.eq(0)
+ #yield dut.p.ready_o.eq(0)
+ yield
+ yield
+ #yield dut.p.i_rst.eq(0)
+ yield dut.n.ready_i.eq(1)
+ yield dut.p.data_i.eq(5)
+ yield dut.p.valid_i.eq(1)
+ yield
+
+ yield dut.p.data_i.eq(7)
+ yield from check_o_n_valid2(dut, 0) # effects of i_p_valid delayed 2 clocks
+ yield
+ yield from check_o_n_valid2(dut, 0) # effects of i_p_valid delayed 2 clocks
+
+ yield dut.p.data_i.eq(2)
+ yield
+ yield from check_o_n_valid2(dut, 1) # ok *now* i_p_valid effect is felt
+ yield dut.n.ready_i.eq(0) # begin going into "stall" (next stage says ready)
+ yield dut.p.data_i.eq(9)
+ yield
+ yield dut.p.valid_i.eq(0)
+ yield dut.p.data_i.eq(12)
+ yield
+ yield dut.p.data_i.eq(32)
+ yield dut.n.ready_i.eq(1)
+ yield
+ yield from check_o_n_valid2(dut, 1) # buffer still needs to output
+ yield
+ yield from check_o_n_valid2(dut, 1) # buffer still needs to output
+ yield
+ yield from check_o_n_valid2(dut, 1) # buffer still needs to output
+ yield
+ yield from check_o_n_valid2(dut, 0) # buffer outputted, *now* we're done.
+ yield
+ yield
+ yield
+
+
+class Test3:
+ def __init__(self, dut, resultfn):
+ self.dut = dut
+ self.resultfn = resultfn
+ self.data = []
+ for i in range(num_tests):
+ #data.append(randint(0, 1<<16-1))
+ self.data.append(i+1)
+ self.i = 0
+ self.o = 0
+
+ def send(self):
+ while self.o != len(self.data):
+ send_range = randint(0, 3)
+ for j in range(randint(1,10)):
+ if send_range == 0:
+ send = True
+ else:
+ send = randint(0, send_range) != 0
+ o_p_ready = yield self.dut.p.ready_o
+ if not o_p_ready:
+ yield
+ continue
+ if send and self.i != len(self.data):
+ yield self.dut.p.valid_i.eq(1)
+ yield self.dut.p.data_i.eq(self.data[self.i])
+ self.i += 1
+ else:
+ yield self.dut.p.valid_i.eq(0)
+ yield
+
+ def rcv(self):
+ while self.o != len(self.data):
+ stall_range = randint(0, 3)
+ for j in range(randint(1,10)):
+ stall = randint(0, stall_range) != 0
+ yield self.dut.n.ready_i.eq(stall)
+ yield
+ o_n_valid = yield self.dut.n.valid_o
+ i_n_ready = yield self.dut.n.ready_i_test
+ if not o_n_valid or not i_n_ready:
+ continue
+ data_o = yield self.dut.n.data_o
+ self.resultfn(data_o, self.data[self.o], self.i, self.o)
+ self.o += 1
+ if self.o == len(self.data):
+ break
+
+def resultfn_3(data_o, expected, i, o):
+ assert data_o == expected + 1, \
+ "%d-%d data %x not match %x\n" \
+ % (i, o, data_o, expected)
+
+def data_placeholder():
+ data = []
+ for i in range(num_tests):
+ d = PlaceHolder()
+ d.src1 = randint(0, 1<<16-1)
+ d.src2 = randint(0, 1<<16-1)
+ data.append(d)
+ return data
+
+def data_dict():
+ data = []
+ for i in range(num_tests):
+ data.append({'src1': randint(0, 1<<16-1),
+ 'src2': randint(0, 1<<16-1)})
+ return data
+
+
+class Test5:
+ def __init__(self, dut, resultfn, data=None, stage_ctl=False):
+ self.dut = dut
+ self.resultfn = resultfn
+ self.stage_ctl = stage_ctl
+ if data:
+ self.data = data
+ else:
+ self.data = []
+ for i in range(num_tests):
+ self.data.append((randint(0, 1<<16-1), randint(0, 1<<16-1)))
+ self.i = 0
+ self.o = 0
+
+ def send(self):
+ while self.o != len(self.data):
+ send_range = randint(0, 3)
+ for j in range(randint(1,10)):
+ if send_range == 0:
+ send = True
+ else:
+ send = randint(0, send_range) != 0
+ #send = True
+ o_p_ready = yield self.dut.p.ready_o
+ if not o_p_ready:
+ yield
+ continue
+ if send and self.i != len(self.data):
+ yield self.dut.p.valid_i.eq(1)
+ for v in self.dut.set_input(self.data[self.i]):
+ yield v
+ self.i += 1
+ else:
+ yield self.dut.p.valid_i.eq(0)
+ yield
+
+ def rcv(self):
+ while self.o != len(self.data):
+ stall_range = randint(0, 3)
+ for j in range(randint(1,10)):
+ ready = randint(0, stall_range) != 0
+ #ready = True
+ yield self.dut.n.ready_i.eq(ready)
+ yield
+ o_n_valid = yield self.dut.n.valid_o
+ i_n_ready = yield self.dut.n.ready_i_test
+ if not o_n_valid or not i_n_ready:
+ continue
+ if isinstance(self.dut.n.data_o, Record):
+ data_o = {}
+ dod = self.dut.n.data_o
+ for k, v in dod.fields.items():
+ data_o[k] = yield v
+ else:
+ data_o = yield self.dut.n.data_o
+ self.resultfn(data_o, self.data[self.o], self.i, self.o)
+ self.o += 1
+ if self.o == len(self.data):
+ break
+
+def resultfn_5(data_o, expected, i, o):
+ res = expected[0] + expected[1]
+ assert data_o == res, \
+ "%d-%d data %x not match %s\n" \
+ % (i, o, data_o, repr(expected))
+
+def tbench4(dut):
+ data = []
+ for i in range(num_tests):
+ #data.append(randint(0, 1<<16-1))
+ data.append(i+1)
+ i = 0
+ o = 0
+ while True:
+ stall = randint(0, 3) != 0
+ send = randint(0, 5) != 0
+ yield dut.n.ready_i.eq(stall)
+ o_p_ready = yield dut.p.ready_o
+ if o_p_ready:
+ if send and i != len(data):
+ yield dut.p.valid_i.eq(1)
+ yield dut.p.data_i.eq(data[i])
+ i += 1
+ else:
+ yield dut.p.valid_i.eq(0)
+ yield
+ o_n_valid = yield dut.n.valid_o
+ i_n_ready = yield dut.n.ready_i_test
+ if o_n_valid and i_n_ready:
+ data_o = yield dut.n.data_o
+ assert data_o == data[o] + 2, "%d-%d data %x not match %x\n" \
+ % (i, o, data_o, data[o])
+ o += 1
+ if o == len(data):
+ break
+
+######################################################################
+# Test 2 and 4
+######################################################################
+
+class ExampleBufPipe2(ControlBase):
+ """ Example of how to do chained pipeline stages.
+ """
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+
+ pipe1 = ExampleBufPipe()
+ pipe2 = ExampleBufPipe()
+
+ m.submodules.pipe1 = pipe1
+ m.submodules.pipe2 = pipe2
+
+ m.d.comb += self.connect([pipe1, pipe2])
+
+ return m
+
+
+######################################################################
+# Test 9
+######################################################################
+
+class ExampleBufPipeChain2(BufferedHandshake):
+ """ connects two stages together as a *single* combinatorial stage.
+ """
+ def __init__(self):
+ stage1 = ExampleStageCls()
+ stage2 = ExampleStageCls()
+ combined = StageChain([stage1, stage2])
+ BufferedHandshake.__init__(self, combined)
+
+
+def data_chain2():
+ data = []
+ for i in range(num_tests):
+ data.append(randint(0, 1<<16-2))
+ return data
+
+
+def resultfn_9(data_o, expected, i, o):
+ res = expected + 2
+ assert data_o == res, \
+ "%d-%d received data %x not match expected %x\n" \
+ % (i, o, data_o, res)
+
+
+######################################################################
+# Test 6 and 10
+######################################################################
+
+class SetLessThan(Elaboratable):
+ def __init__(self, width, signed):
+ self.m = Module()
+ self.src1 = Signal((width, signed), name="src1")
+ self.src2 = Signal((width, signed), name="src2")
+ self.output = Signal(width, name="out")
+
+ def elaborate(self, platform):
+ self.m.d.comb += self.output.eq(Mux(self.src1 < self.src2, 1, 0))
+ return self.m
+
+
+class LTStage(StageCls):
+ """ module-based stage example
+ """
+ def __init__(self):
+ self.slt = SetLessThan(16, True)
+
+ def ispec(self, name):
+ return (Signal(16, name="%s_sig1" % name),
+ Signal(16, name="%s_sig2" % name))
+
+ def ospec(self, name):
+ return Signal(16, "%s_out" % name)
+
+ def setup(self, m, i):
+ self.o = Signal(16)
+ m.submodules.slt = self.slt
+ m.d.comb += self.slt.src1.eq(i[0])
+ m.d.comb += self.slt.src2.eq(i[1])
+ m.d.comb += self.o.eq(self.slt.output)
+
+ def process(self, i):
+ return self.o
+
+
+class LTStageDerived(SetLessThan, StageCls):
+ """ special version of a nmigen module where the module is also a stage
+
+ shows that you don't actually need to combinatorially connect
+ to the outputs, or add the module as a submodule: just return
+ the module output parameter(s) from the Stage.process() function
+ """
+
+ def __init__(self):
+ SetLessThan.__init__(self, 16, True)
+
+ def ispec(self):
+ return (Signal(16), Signal(16))
+
+ def ospec(self):
+ return Signal(16)
+
+ def setup(self, m, i):
+ m.submodules.slt = self
+ m.d.comb += self.src1.eq(i[0])
+ m.d.comb += self.src2.eq(i[1])
+
+ def process(self, i):
+ return self.output
+
+
+class ExampleLTPipeline(UnbufferedPipeline):
+ """ an example of how to use the unbuffered pipeline.
+ """
+
+ def __init__(self):
+ stage = LTStage()
+ UnbufferedPipeline.__init__(self, stage)
+
+
+class ExampleLTBufferedPipeDerived(BufferedHandshake):
+ """ an example of how to use the buffered pipeline.
+ """
+
+ def __init__(self):
+ stage = LTStageDerived()
+ BufferedHandshake.__init__(self, stage)
+
+
+def resultfn_6(data_o, expected, i, o):
+ res = 1 if expected[0] < expected[1] else 0
+ assert data_o == res, \
+ "%d-%d data %x not match %s\n" \
+ % (i, o, data_o, repr(expected))
+
+
+######################################################################
+# Test 7
+######################################################################
+
+class ExampleAddRecordStage(StageCls):
+ """ example use of a Record
+ """
+
+ record_spec = [('src1', 16), ('src2', 16)]
+ def ispec(self):
+ """ returns a Record using the specification
+ """
+ return Record(self.record_spec)
+
+ def ospec(self):
+ return Record(self.record_spec)
+
+ def process(self, i):
+ """ process the input data, returning a dictionary with key names
+ that exactly match the Record's attributes.
+ """
+ return {'src1': i.src1 + 1,
+ 'src2': i.src2 + 1}
+
+######################################################################
+# Test 11
+######################################################################
+
+class ExampleAddRecordPlaceHolderStage(StageCls):
+ """ example use of a Record, with a placeholder as the processing result
+ """
+
+ record_spec = [('src1', 16), ('src2', 16)]
+ def ispec(self):
+ """ returns a Record using the specification
+ """
+ return Record(self.record_spec)
+
+ def ospec(self):
+ return Record(self.record_spec)
+
+ def process(self, i):
+ """ process the input data, returning a PlaceHolder class instance
+ with attributes that exactly match those of the Record.
+ """
+ o = PlaceHolder()
+ o.src1 = i.src1 + 1
+ o.src2 = i.src2 + 1
+ return o
+
+
+# a dummy class that may have stuff assigned to instances once created
+class PlaceHolder: pass
+
+
+class ExampleAddRecordPipe(UnbufferedPipeline):
+ """ an example of how to use the combinatorial pipeline.
+ """
+
+ def __init__(self):
+ stage = ExampleAddRecordStage()
+ UnbufferedPipeline.__init__(self, stage)
+
+
+def resultfn_7(data_o, expected, i, o):
+ res = (expected['src1'] + 1, expected['src2'] + 1)
+ assert data_o['src1'] == res[0] and data_o['src2'] == res[1], \
+ "%d-%d data %s not match %s\n" \
+ % (i, o, repr(data_o), repr(expected))
+
+
+class ExampleAddRecordPlaceHolderPipe(UnbufferedPipeline):
+ """ an example of how to use the combinatorial pipeline.
+ """
+
+ def __init__(self):
+ stage = ExampleAddRecordPlaceHolderStage()
+ UnbufferedPipeline.__init__(self, stage)
+
+
+def resultfn_test11(data_o, expected, i, o):
+ res1 = expected.src1 + 1
+ res2 = expected.src2 + 1
+ assert data_o['src1'] == res1 and data_o['src2'] == res2, \
+ "%d-%d data %s not match %s\n" \
+ % (i, o, repr(data_o), repr(expected))
+
+
+######################################################################
+# Test 8
+######################################################################
+
+
+class Example2OpClass:
+ """ an example of a class used to store 2 operands.
+ requires an eq function, to conform with the pipeline stage API
+ """
+
+ def __init__(self):
+ self.op1 = Signal(16)
+ self.op2 = Signal(16)
+
+ def eq(self, i):
+ return [self.op1.eq(i.op1), self.op2.eq(i.op2)]
+
+
+class ExampleAddClassStage(StageCls):
+ """ an example of how to use the buffered pipeline, as a class instance
+ """
+
+ def ispec(self):
+ """ returns an instance of an Example2OpClass.
+ """
+ return Example2OpClass()
+
+ def ospec(self):
+ """ returns an output signal which will happen to contain the sum
+ of the two inputs
+ """
+ return Signal(16, name="add2_out")
+
+ def process(self, i):
+ """ process the input data (sums the values in the tuple) and returns it
+ """
+ return i.op1 + i.op2
+
+
+class ExampleBufPipeAddClass(BufferedHandshake):
+ """ an example of how to use the buffered pipeline, using a class instance
+ """
+
+ def __init__(self):
+ addstage = ExampleAddClassStage()
+ BufferedHandshake.__init__(self, addstage)
+
+
+class TestInputAdd:
+ """ the eq function, called by set_input, needs an incoming object
+ that conforms to the Example2OpClass.eq function requirements
+ easiest way to do that is to create a class that has the exact
+ same member layout (self.op1, self.op2) as Example2OpClass
+ """
+ def __init__(self, op1, op2):
+ self.op1 = op1
+ self.op2 = op2
+
+
+def resultfn_8(data_o, expected, i, o):
+ res = expected.op1 + expected.op2 # these are a TestInputAdd instance
+ assert data_o == res, \
+ "%d-%d data %s res %x not match %s\n" \
+ % (i, o, repr(data_o), res, repr(expected))
+
+def data_2op():
+ data = []
+ for i in range(num_tests):
+ data.append(TestInputAdd(randint(0, 1<<16-1), randint(0, 1<<16-1)))
+ return data
+
+
+######################################################################
+# Test 12
+######################################################################
+
+class ExampleStageDelayCls(StageCls, Elaboratable):
+ """ an example of how to use the buffered pipeline, in a static class
+ fashion
+ """
+
+ def __init__(self, valid_trigger=2):
+ self.count = Signal(2)
+ self.valid_trigger = valid_trigger
+
+ def ispec(self):
+ return Signal(16, name="example_input_signal")
+
+ def ospec(self):
+ return Signal(16, name="example_output_signal")
+
+ @property
+ def d_ready(self):
+ """ data is ready to be accepted when this is true
+ """
+ return (self.count == 1)# | (self.count == 3)
+ return Const(1)
+
+ def d_valid(self, ready_i):
+ """ data is valid at output when this is true
+ """
+ return self.count == self.valid_trigger
+ return Const(1)
+
+ def process(self, i):
+ """ process the input data and returns it (adds 1)
+ """
+ return i + 1
+
+ def elaborate(self, platform):
+ m = Module()
+ m.d.sync += self.count.eq(self.count + 1)
+ return m
+
+
+class ExampleBufDelayedPipe(BufferedHandshake):
+
+ def __init__(self):
+ stage = ExampleStageDelayCls(valid_trigger=2)
+ BufferedHandshake.__init__(self, stage, stage_ctl=True)
+
+ def elaborate(self, platform):
+ m = BufferedHandshake.elaborate(self, platform)
+ m.submodules.stage = self.stage
+ return m
+
+
+def data_chain1():
+ data = []
+ for i in range(num_tests):
+ data.append(1<<((i*3)%15))
+ #data.append(randint(0, 1<<16-2))
+ #print (hex(data[-1]))
+ return data
+
+
+def resultfn_12(data_o, expected, i, o):
+ res = expected + 1
+ assert data_o == res, \
+ "%d-%d data %x not match %x\n" \
+ % (i, o, data_o, res)
+
+
+######################################################################
+# Test 13
+######################################################################
+
+class ExampleUnBufDelayedPipe(BufferedHandshake):
+
+ def __init__(self):
+ stage = ExampleStageDelayCls(valid_trigger=3)
+ BufferedHandshake.__init__(self, stage, stage_ctl=True)
+
+ def elaborate(self, platform):
+ m = BufferedHandshake.elaborate(self, platform)
+ m.submodules.stage = self.stage
+ return m
+
+######################################################################
+# Test 15
+######################################################################
+
+class ExampleBufModeAdd1Pipe(SimpleHandshake):
+
+ def __init__(self):
+ stage = ExampleStageCls()
+ SimpleHandshake.__init__(self, stage)
+
+
+######################################################################
+# Test 16
+######################################################################
+
+class ExampleBufModeUnBufPipe(ControlBase):
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+
+ pipe1 = ExampleBufModeAdd1Pipe()
+ pipe2 = ExampleBufAdd1Pipe()
+
+ m.submodules.pipe1 = pipe1
+ m.submodules.pipe2 = pipe2
+
+ m.d.comb += self.connect([pipe1, pipe2])
+
+ return m
+
+######################################################################
+# Test 17
+######################################################################
+
+class ExampleUnBufAdd1Pipe2(UnbufferedPipeline2):
+
+ def __init__(self):
+ stage = ExampleStageCls()
+ UnbufferedPipeline2.__init__(self, stage)
+
+
+######################################################################
+# Test 18
+######################################################################
+
+class PassThroughTest(PassThroughHandshake):
+
+ def iospecfn(self):
+ return Signal(16, "out")
+
+ def __init__(self):
+ stage = PassThroughStage(self.iospecfn)
+ PassThroughHandshake.__init__(self, stage)
+
+def resultfn_identical(data_o, expected, i, o):
+ res = expected
+ assert data_o == res, \
+ "%d-%d data %x not match %x\n" \
+ % (i, o, data_o, res)
+
+
+######################################################################
+# Test 19
+######################################################################
+
+class ExamplePassAdd1Pipe(PassThroughHandshake):
+
+ def __init__(self):
+ stage = ExampleStageCls()
+ PassThroughHandshake.__init__(self, stage)
+
+
+class ExampleBufPassThruPipe(ControlBase):
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+
+ # XXX currently fails: any other permutation works fine.
+ # p1=u,p2=b ok p1=u,p2=u ok p1=b,p2=b ok
+ # also fails using UnbufferedPipeline as well
+ pipe1 = ExampleBufModeAdd1Pipe()
+ pipe2 = ExamplePassAdd1Pipe()
+
+ m.submodules.pipe1 = pipe1
+ m.submodules.pipe2 = pipe2
+
+ m.d.comb += self.connect([pipe1, pipe2])
+
+ return m
+
+
+######################################################################
+# Test 20
+######################################################################
+
+def iospecfn():
+ return Signal(16, name="d_in")
+
+class FIFOTest16(FIFOControl):
+
+ def __init__(self):
+ stage = PassThroughStage(iospecfn)
+ FIFOControl.__init__(self, 2, stage)
+
+
+######################################################################
+# Test 21
+######################################################################
+
+class ExampleFIFOPassThruPipe1(ControlBase):
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+
+ pipe1 = FIFOTest16()
+ pipe2 = FIFOTest16()
+ pipe3 = ExamplePassAdd1Pipe()
+
+ m.submodules.pipe1 = pipe1
+ m.submodules.pipe2 = pipe2
+ m.submodules.pipe3 = pipe3
+
+ m.d.comb += self.connect([pipe1, pipe2, pipe3])
+
+ return m
+
+
+######################################################################
+# Test 22
+######################################################################
+
+class Example2OpRecord(RecordObject):
+ def __init__(self):
+ RecordObject.__init__(self)
+ self.op1 = Signal(16)
+ self.op2 = Signal(16)
+
+
+class ExampleAddRecordObjectStage(StageCls):
+
+ def ispec(self):
+ """ returns an instance of an Example2OpRecord.
+ """
+ return Example2OpRecord()
+
+ def ospec(self):
+ """ returns an output signal which will happen to contain the sum
+ of the two inputs
+ """
+ return Signal(16)
+
+ def process(self, i):
+ """ process the input data (sums the values in the tuple) and returns it
+ """
+ return i.op1 + i.op2
+
+
+class ExampleRecordHandshakeAddClass(SimpleHandshake):
+
+ def __init__(self):
+ addstage = ExampleAddRecordObjectStage()
+ SimpleHandshake.__init__(self, stage=addstage)
+
+
+######################################################################
+# Test 23
+######################################################################
+
+def iospecfnrecord():
+ return Example2OpRecord()
+
+class FIFOTestRecordControl(FIFOControl):
+
+ def __init__(self):
+ stage = PassThroughStage(iospecfnrecord)
+ FIFOControl.__init__(self, 2, stage)
+
+
+class ExampleFIFORecordObjectPipe(ControlBase):
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+
+ pipe1 = FIFOTestRecordControl()
+ pipe2 = ExampleRecordHandshakeAddClass()
+
+ m.submodules.pipe1 = pipe1
+ m.submodules.pipe2 = pipe2
+
+ m.d.comb += self.connect([pipe1, pipe2])
+
+ return m
+
+
+######################################################################
+# Test 24
+######################################################################
+
+class FIFOTestRecordAddStageControl(FIFOControl):
+
+ def __init__(self):
+ stage = ExampleAddRecordObjectStage()
+ FIFOControl.__init__(self, 2, stage)
+
+
+
+######################################################################
+# Test 25
+######################################################################
+
+class FIFOTestAdd16(FIFOControl):
+
+ def __init__(self):
+ stage = ExampleStageCls()
+ FIFOControl.__init__(self, 2, stage)
+
+
+class ExampleFIFOAdd2Pipe(ControlBase):
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+
+ pipe1 = FIFOTestAdd16()
+ pipe2 = FIFOTestAdd16()
+
+ m.submodules.pipe1 = pipe1
+ m.submodules.pipe2 = pipe2
+
+ m.d.comb += self.connect([pipe1, pipe2])
+
+ return m
+
+
+######################################################################
+# Test 26
+######################################################################
+
+def iospecfn24():
+ return (Signal(16, name="src1"), Signal(16, name="src2"))
+
+class FIFOTest2x16(FIFOControl):
+
+ def __init__(self):
+ stage = PassThroughStage(iospecfn2)
+ FIFOControl.__init__(self, 2, stage)
+
+
+######################################################################
+# Test 997
+######################################################################
+
+class ExampleBufPassThruPipe2(ControlBase):
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+
+ # XXX currently fails: any other permutation works fine.
+ # p1=u,p2=b ok p1=u,p2=u ok p1=b,p2=b ok
+ # also fails using UnbufferedPipeline as well
+ #pipe1 = ExampleUnBufAdd1Pipe()
+ #pipe2 = ExampleBufAdd1Pipe()
+ pipe1 = ExampleBufAdd1Pipe()
+ pipe2 = ExamplePassAdd1Pipe()
+
+ m.submodules.pipe1 = pipe1
+ m.submodules.pipe2 = pipe2
+
+ m.d.comb += self.connect([pipe1, pipe2])
+
+ return m
+
+
+######################################################################
+# Test 998
+######################################################################
+
+class ExampleBufPipe3(ControlBase):
+ """ Example of how to do delayed pipeline, where the stage signals
+ whether it is ready.
+ """
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+
+ pipe1 = ExampleBufDelayedPipe()
+ pipe2 = ExampleBufPipe()
+
+ m.submodules.pipe1 = pipe1
+ m.submodules.pipe2 = pipe2
+
+ m.d.comb += self.connect([pipe1, pipe2])
+
+ return m
+
+######################################################################
+# Test 999 - XXX FAILS
+# http://bugs.libre-riscv.org/show_bug.cgi?id=57
+######################################################################
+
+class ExampleBufAdd1Pipe(BufferedHandshake):
+
+ def __init__(self):
+ stage = ExampleStageCls()
+ BufferedHandshake.__init__(self, stage)
+
+
+class ExampleUnBufAdd1Pipe(UnbufferedPipeline):
+
+ def __init__(self):
+ stage = ExampleStageCls()
+ UnbufferedPipeline.__init__(self, stage)
+
+
+class ExampleBufUnBufPipe(ControlBase):
+
+ def elaborate(self, platform):
+ m = ControlBase.elaborate(self, platform)
+
+ # XXX currently fails: any other permutation works fine.
+ # p1=u,p2=b ok p1=u,p2=u ok p1=b,p2=b ok
+ # also fails using UnbufferedPipeline as well
+ #pipe1 = ExampleUnBufAdd1Pipe()
+ #pipe2 = ExampleBufAdd1Pipe()
+ pipe1 = ExampleBufAdd1Pipe()
+ pipe2 = ExampleUnBufAdd1Pipe()
+
+ m.submodules.pipe1 = pipe1
+ m.submodules.pipe2 = pipe2
+
+ m.d.comb += self.connect([pipe1, pipe2])
+
+ return m
+
+
+######################################################################
+# Unit Tests
+######################################################################
+
+num_tests = 10
+
+if __name__ == '__main__':
+ if False:
+ print ("test 1")
+ dut = ExampleBufPipe()
+ run_simulation(dut, tbench(dut), vcd_name="test_bufpipe.vcd")
+
+ print ("test 2")
+ dut = ExampleBufPipe2()
+ run_simulation(dut, tbench2(dut), vcd_name="test_bufpipe2.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_bufpipe2.il", "w") as f:
+ f.write(vl)
+
+
+ print ("test 3")
+ dut = ExampleBufPipe()
+ test = Test3(dut, resultfn_3)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe3.vcd")
+
+ print ("test 3.5")
+ dut = ExamplePipeline()
+ test = Test3(dut, resultfn_3)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_combpipe3.vcd")
+
+ print ("test 4")
+ dut = ExampleBufPipe2()
+ run_simulation(dut, tbench4(dut), vcd_name="test_bufpipe4.vcd")
+
+ print ("test 5")
+ dut = ExampleBufPipeAdd()
+ test = Test5(dut, resultfn_5, stage_ctl=True)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe5.vcd")
+
+ print ("test 6")
+ dut = ExampleLTPipeline()
+ test = Test5(dut, resultfn_6)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_ltcomb6.vcd")
+
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ list(dut.p.data_i) + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_ltcomb_pipe.il", "w") as f:
+ f.write(vl)
+
+ print ("test 7")
+ dut = ExampleAddRecordPipe()
+ data=data_dict()
+ test = Test5(dut, resultfn_7, data=data)
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o,
+ dut.p.data_i.src1, dut.p.data_i.src2,
+ dut.n.data_o.src1, dut.n.data_o.src2]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_recordcomb_pipe.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord.vcd")
+
+ print ("test 8")
+ dut = ExampleBufPipeAddClass()
+ data=data_2op()
+ test = Test5(dut, resultfn_8, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe8.vcd")
+
+ print ("test 9")
+ dut = ExampleBufPipeChain2()
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_bufpipechain2.il", "w") as f:
+ f.write(vl)
+
+ data = data_chain2()
+ test = Test5(dut, resultfn_9, data=data)
+ run_simulation(dut, [test.send, test.rcv],
+ vcd_name="test_bufpipechain2.vcd")
+
+ print ("test 10")
+ dut = ExampleLTBufferedPipeDerived()
+ test = Test5(dut, resultfn_6)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_ltbufpipe10.vcd")
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_ltbufpipe10.il", "w") as f:
+ f.write(vl)
+
+ print ("test 11")
+ dut = ExampleAddRecordPlaceHolderPipe()
+ data=data_placeholder()
+ test = Test5(dut, resultfn_test11, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord.vcd")
+
+
+ print ("test 12")
+ dut = ExampleBufDelayedPipe()
+ data = data_chain1()
+ test = Test5(dut, resultfn_12, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe12.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_bufpipe12.il", "w") as f:
+ f.write(vl)
+
+ print ("test 13")
+ dut = ExampleUnBufDelayedPipe()
+ data = data_chain1()
+ test = Test5(dut, resultfn_12, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_unbufpipe13.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_unbufpipe13.il", "w") as f:
+ f.write(vl)
+
+ print ("test 15")
+ dut = ExampleBufModeAdd1Pipe()
+ data = data_chain1()
+ test = Test5(dut, resultfn_12, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufunbuf15.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_bufunbuf15.il", "w") as f:
+ f.write(vl)
+
+ print ("test 16")
+ dut = ExampleBufModeUnBufPipe()
+ data = data_chain1()
+ test = Test5(dut, resultfn_9, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufunbuf16.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_bufunbuf16.il", "w") as f:
+ f.write(vl)
+
+ print ("test 17")
+ dut = ExampleUnBufAdd1Pipe2()
+ data = data_chain1()
+ test = Test5(dut, resultfn_12, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_unbufpipe17.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_unbufpipe17.il", "w") as f:
+ f.write(vl)
+
+ print ("test 18")
+ dut = PassThroughTest()
+ data = data_chain1()
+ test = Test5(dut, resultfn_identical, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_passthru18.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_passthru18.il", "w") as f:
+ f.write(vl)
+
+ print ("test 19")
+ dut = ExampleBufPassThruPipe()
+ data = data_chain1()
+ test = Test5(dut, resultfn_9, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpass19.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_bufpass19.il", "w") as f:
+ f.write(vl)
+
+ print ("test 20")
+ dut = FIFOTest16()
+ data = data_chain1()
+ test = Test5(dut, resultfn_identical, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_fifo20.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_fifo20.il", "w") as f:
+ f.write(vl)
+
+ print ("test 21")
+ dut = ExampleFIFOPassThruPipe1()
+ data = data_chain1()
+ test = Test5(dut, resultfn_12, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_fifopass21.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_fifopass21.il", "w") as f:
+ f.write(vl)
+
+ print ("test 22")
+ dut = ExampleRecordHandshakeAddClass()
+ data=data_2op()
+ test = Test5(dut, resultfn_8, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord22.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i.op1, dut.p.data_i.op2] + \
+ [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_addrecord22.il", "w") as f:
+ f.write(vl)
+
+ print ("test 23")
+ dut = ExampleFIFORecordObjectPipe()
+ data=data_2op()
+ test = Test5(dut, resultfn_8, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord23.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i.op1, dut.p.data_i.op2] + \
+ [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_addrecord23.il", "w") as f:
+ f.write(vl)
+
+ print ("test 24")
+ dut = FIFOTestRecordAddStageControl()
+ data=data_2op()
+ test = Test5(dut, resultfn_8, data=data)
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i.op1, dut.p.data_i.op2] + \
+ [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_addrecord24.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord24.vcd")
+
+ print ("test 25")
+ dut = ExampleFIFOAdd2Pipe()
+ data = data_chain1()
+ test = Test5(dut, resultfn_9, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_add2pipe25.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_add2pipe25.il", "w") as f:
+ f.write(vl)
+
+ print ("test 997")
+ dut = ExampleBufPassThruPipe2()
+ data = data_chain1()
+ test = Test5(dut, resultfn_9, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpass997.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_bufpass997.il", "w") as f:
+ f.write(vl)
+
+ print ("test 998 (fails, bug)")
+ dut = ExampleBufPipe3()
+ data = data_chain1()
+ test = Test5(dut, resultfn_9, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe14.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_bufpipe14.il", "w") as f:
+ f.write(vl)
+
+ print ("test 999 (expected to fail, which is a bug)")
+ dut = ExampleBufUnBufPipe()
+ data = data_chain1()
+ test = Test5(dut, resultfn_9, data=data)
+ run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufunbuf999.vcd")
+ ports = [dut.p.valid_i, dut.n.ready_i,
+ dut.n.valid_o, dut.p.ready_o] + \
+ [dut.p.data_i] + [dut.n.data_o]
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_bufunbuf999.il", "w") as f:
+ f.write(vl)
+
--- /dev/null
+import sys
+from random import randint
+from random import seed
+from operator import truediv
+
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+
+from nmigen_div_experiment import FPDIV
+
+from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan,
+ is_inf, is_pos_inf, is_neg_inf,
+ match, get_case, check_case, run_test,
+ run_edge_cases, run_corner_cases)
+
+
+def testbench(dut):
+ yield from check_case(dut, 0x80000000, 0x00000000, 0xffc00000)
+ yield from check_case(dut, 0x00000000, 0x80000000, 0xffc00000)
+ yield from check_case(dut, 0x0002b017, 0xff3807ab, 0x80000000)
+ yield from check_case(dut, 0x40000000, 0x3F800000, 0x40000000)
+ yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000)
+ yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB)
+ yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C)
+ yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2)
+ yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8)
+ yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC)
+ yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5)
+ yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2)
+
+ count = 0
+
+ #regression tests
+ stimulus_a = [0xbf9b1e94, 0x34082401, 0x5e8ef81, 0x5c75da81, 0x2b017]
+ stimulus_b = [0xc038ed3a, 0xb328cd45, 0x114f3db, 0x2f642a39, 0xff3807ab]
+ yield from run_test(dut, stimulus_a, stimulus_b, truediv, get_case)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ yield from run_corner_cases(dut, count, truediv, get_case)
+ yield from run_edge_cases(dut, count, truediv, get_case)
+
+
+if __name__ == '__main__':
+ dut = FPDIV(width=32)
+ run_simulation(dut, testbench(dut), vcd_name="test_div.vcd")
+
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+
+from nmigen_div_experiment import FPDIV
+
+class ORGate:
+ def __init__(self):
+ self.a = Signal()
+ self.b = Signal()
+ self.x = Signal()
+
+ def elaborate(self, platform=None):
+
+ m = Module()
+ m.d.comb += self.x.eq(self.a | self.b)
+
+ return m
+
+def check_case(dut, a, b, z):
+ yield dut.in_a.v.eq(a)
+ yield dut.in_a.stb.eq(1)
+ yield
+ yield
+ a_ack = (yield dut.in_a.ack)
+ assert a_ack == 0
+ yield dut.in_b.v.eq(b)
+ yield dut.in_b.stb.eq(1)
+ b_ack = (yield dut.in_b.ack)
+ assert b_ack == 0
+
+ while True:
+ yield
+ out_z_stb = (yield dut.out_z.stb)
+ if not out_z_stb:
+ continue
+ yield dut.in_a.stb.eq(0)
+ yield dut.in_b.stb.eq(0)
+ yield dut.out_z.ack.eq(1)
+ yield
+ yield dut.out_z.ack.eq(0)
+ yield
+ yield
+ break
+
+ out_z = yield dut.out_z.v
+ assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
+
+def testbench(dut):
+ yield from check_case(dut, 0x4008000000000000, 0x3FF0000000000000,
+ 0x4008000000000000)
+ yield from check_case(dut, 0x3FF0000000000000, 0x4008000000000000,
+ 0x3FD5555555555555)
+
+ if False:
+ yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000)
+ yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB)
+ yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C)
+ yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2)
+ yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8)
+ yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC)
+ yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5)
+ yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2)
+
+if __name__ == '__main__':
+ dut = FPDIV(width=64)
+ run_simulation(dut, testbench(dut), vcd_name="test_div64.vcd")
+
--- /dev/null
+from sfpy import Float32
+from nmigen.compat.sim import run_simulation
+from dual_add_experiment import ALU
+
+
+def get_case(dut, a, b, c):
+ yield dut.a.v.eq(a)
+ yield dut.a.stb.eq(1)
+ yield
+ yield
+ a_ack = (yield dut.a.ack)
+ assert a_ack == 0
+
+ yield dut.a.stb.eq(0)
+
+ yield dut.b.v.eq(b)
+ yield dut.b.stb.eq(1)
+ yield
+ yield
+ b_ack = (yield dut.b.ack)
+ assert b_ack == 0
+
+ yield dut.b.stb.eq(0)
+
+ yield dut.c.v.eq(c)
+ yield dut.c.stb.eq(1)
+ yield
+ yield
+ c_ack = (yield dut.c.ack)
+ assert c_ack == 0
+
+ yield dut.c.stb.eq(0)
+
+ yield dut.z.ack.eq(1)
+
+ while True:
+ out_z_stb = (yield dut.z.stb)
+ if not out_z_stb:
+ yield
+ continue
+
+ out_z = yield dut.z.v
+
+ yield dut.z.ack.eq(0)
+ break
+
+ return out_z
+
+def check_case(dut, a, b, c, z):
+ out_z = yield from get_case(dut, a, b, c)
+ assert out_z == z, "Output z 0x%x != 0x%x" % (out_z, z)
+
+def testbench(dut):
+ yield from check_case(dut, 0, 0, 0, 0)
+ yield from check_case(dut, 0x3F800000, 0x40000000, 0xc0000000, 0x3F800000)
+
+if __name__ == '__main__':
+ dut = ALU(width=32)
+ run_simulation(dut, testbench(dut), vcd_name="test_dual_add.vcd")
+
--- /dev/null
+""" key strategic example showing how to do multi-input fan-in into a
+ multi-stage pipeline, then multi-output fanout.
+
+ the multiplex ID from the fan-in is passed in to the pipeline, preserved,
+ and used as a routing ID on the fanout.
+"""
+
+from random import randint
+from math import log
+from nmigen import Module, Signal, Cat, Value
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+
+from nmigen_add_experiment import (FPADDMuxInOut,)
+
+from sfpy import Float32
+
+class InputTest:
+ def __init__(self, dut):
+ self.dut = dut
+ self.di = {}
+ self.do = {}
+ self.tlen = 10
+ self.width = 32
+ for mid in range(dut.num_rows):
+ self.di[mid] = {}
+ self.do[mid] = []
+ for i in range(self.tlen):
+ op1 = randint(0, (1<<self.width)-1)
+ op2 = randint(0, (1<<self.width)-1)
+ #op1 = 0x40900000
+ #op2 = 0x40200000
+ res = Float32(op1) + Float32(op2)
+ self.di[mid][i] = (op1, op2)
+ self.do[mid].append(res.bits)
+
+ def send(self, mid):
+ for i in range(self.tlen):
+ op1, op2 = self.di[mid][i]
+ rs = dut.p[mid]
+ yield rs.valid_i.eq(1)
+ yield rs.data_i.a.eq(op1)
+ yield rs.data_i.b.eq(op2)
+ yield rs.data_i.mid.eq(mid)
+ yield
+ o_p_ready = yield rs.ready_o
+ while not o_p_ready:
+ yield
+ o_p_ready = yield rs.ready_o
+
+ fop1 = Float32(op1)
+ fop2 = Float32(op2)
+ res = fop1 + fop2
+ print ("send", mid, i, hex(op1), hex(op2), hex(res.bits),
+ fop1, fop2, res)
+
+ yield rs.valid_i.eq(0)
+ # wait random period of time before queueing another value
+ for i in range(randint(0, 3)):
+ yield
+
+ yield rs.valid_i.eq(0)
+ yield
+
+ print ("send ended", mid)
+
+ ## wait random period of time before queueing another value
+ #for i in range(randint(0, 3)):
+ # yield
+
+ #send_range = randint(0, 3)
+ #if send_range == 0:
+ # send = True
+ #else:
+ # send = randint(0, send_range) != 0
+
+ def rcv(self, mid):
+ while True:
+ #stall_range = randint(0, 3)
+ #for j in range(randint(1,10)):
+ # stall = randint(0, stall_range) != 0
+ # yield self.dut.n[0].ready_i.eq(stall)
+ # yield
+ n = self.dut.n[mid]
+ yield n.ready_i.eq(1)
+ yield
+ o_n_valid = yield n.valid_o
+ i_n_ready = yield n.ready_i
+ if not o_n_valid or not i_n_ready:
+ continue
+
+ out_mid = yield n.data_o.mid
+ out_z = yield n.data_o.z
+
+ out_i = 0
+
+ print ("recv", out_mid, hex(out_z), "expected",
+ hex(self.do[mid][out_i] ))
+
+ # see if this output has occurred already, delete it if it has
+ assert mid == out_mid, "out_mid %d not correct %d" % (out_mid, mid)
+ assert self.do[mid][out_i] == out_z
+ del self.do[mid][out_i]
+
+ # check if there's any more outputs
+ if len(self.do[mid]) == 0:
+ break
+ print ("recv ended", mid)
+
+
+
+if __name__ == '__main__':
+ dut = FPADDMuxInOut(32, 4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_fpadd_pipe.il", "w") as f:
+ f.write(vl)
+ #run_simulation(dut, testbench(dut), vcd_name="test_inputgroup.vcd")
+
+ test = InputTest(dut)
+ run_simulation(dut, [test.rcv(1), test.rcv(0),
+ test.rcv(3), test.rcv(2),
+ test.send(0), test.send(1),
+ test.send(3), test.send(2),
+ ],
+ vcd_name="test_fpadd_pipe.vcd")
+
--- /dev/null
+from random import randint
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+
+from fpbase import FPNum
+
+class FPNumModShiftMulti:
+ def __init__(self, width):
+ self.a = FPNum(width)
+ self.ediff = Signal((self.a.e_width, True))
+
+ def elaborate(self, platform=None):
+
+ m = Module()
+ #m.d.sync += self.a.decode(self.a.v)
+ m.d.sync += self.a.shift_down_multi(self.ediff)
+
+ return m
+
+def check_case(dut, width, e_width, m, e, i):
+ yield dut.a.m.eq(m)
+ yield dut.a.e.eq(e)
+ yield dut.ediff.eq(i)
+ yield
+ yield
+
+ out_m = yield dut.a.m
+ out_e = yield dut.a.e
+ ed = yield dut.ediff
+ calc_e = (e + i)
+ print (e, bin(m), out_e, calc_e, bin(out_m), i, ed)
+
+ calc_m = ((m >> (i+1)) << 1) | (m & 1)
+ for l in range(i):
+ if m & (1<<(l+1)):
+ calc_m |= 1
+
+ assert out_e == calc_e, "Output e 0x%x != expected 0x%x" % (out_e, calc_e)
+ assert out_m == calc_m, "Output m 0x%x != expected 0x%x" % (out_m, calc_m)
+
+def testbench(dut):
+ m_width = dut.a.m_width
+ e_width = dut.a.e_width
+ e_max = dut.a.e_max
+ for j in range(200):
+ m = randint(0, (1<<m_width)-1)
+ zeros = randint(0, 31)
+ for i in range(zeros):
+ m &= ~(1<<i)
+ e = randint(-e_max, e_max)
+ for i in range(32):
+ yield from check_case(dut, m_width, e_width, m, e, i)
+
+if __name__ == '__main__':
+ dut = FPNumModShiftMulti(width=32)
+ run_simulation(dut, testbench(dut), vcd_name="test_multishift.vcd")
+
+ #dut = MultiShiftModL(width=32)
+ #run_simulation(dut, testbench(dut), vcd_name="test_multishift.vcd")
+
--- /dev/null
+# IEEE Floating Point Divider (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Const, Cat, Elaboratable
+from nmigen.cli import main, verilog, rtlil
+from nmigen.compat.sim import run_simulation
+
+
+from fpbase import FPNumIn, FPNumOut, FPOpIn, FPOpOut, FPBase, FPState
+from nmoperator import eq
+from singlepipe import SimpleHandshake, ControlBase
+from test_buf_pipe import data_chain2, Test5
+
+
+class FPDIV(FPBase, Elaboratable):
+
+ def __init__(self, width):
+ FPBase.__init__(self)
+ self.width = width
+
+ self.p = FPOpIn(width)
+ self.n = FPOpOut(width)
+
+ self.p.data_i = self.ispec()
+ self.n.data_o = self.ospec()
+
+ self.states = []
+
+ def ispec(self):
+ return Signal(self.width, name="a")
+
+ def ospec(self):
+ return Signal(self.width, name="z")
+
+ def setup(self, m, i):
+ m.d.comb += self.p.v.eq(i) # connect input
+
+ def process(self, i):
+ return self.n.v # return z output
+
+ def add_state(self, state):
+ self.states.append(state)
+ return state
+
+ def elaborate(self, platform=None):
+ """ creates the HDL code-fragment for FPDiv
+ """
+ m = Module()
+
+ # Latches
+ a = FPNumIn(None, self.width, False)
+ z = FPNumOut(self.width, False)
+
+ m.submodules.p = self.p
+ m.submodules.n = self.n
+ m.submodules.a = a
+ m.submodules.z = z
+
+ m.d.comb += a.v.eq(self.p.v)
+
+ with m.FSM() as fsm:
+
+ # ******
+ # gets operand a
+
+ with m.State("get_a"):
+ res = self.get_op(m, self.p, a, "add_1")
+ m.d.sync += eq([a, self.p.ready_o], res)
+
+ with m.State("add_1"):
+ m.next = "pack"
+ m.d.sync += [
+ z.s.eq(a.s), # sign
+ z.e.eq(a.e), # exponent
+ z.m.eq(a.m + 1), # mantissa
+ ]
+
+ # ******
+ # pack stage
+
+ with m.State("pack"):
+ self.pack(m, z, "put_z")
+
+ # ******
+ # put_z stage
+
+ with m.State("put_z"):
+ self.put_z(m, z, self.n, "get_a")
+
+ return m
+
+class FPDIVPipe(ControlBase):
+
+ def __init__(self, width):
+ self.width = width
+ self.fpdiv = FPDIV(width=width)
+ ControlBase.__init__(self, self.fpdiv)
+
+ def elaborate(self, platform):
+ self.m = m = ControlBase.elaborate(self, platform)
+
+ m.submodules.fpdiv = self.fpdiv
+
+ # see if connecting to stb/ack works
+ m.d.comb += self.fpdiv.p._connect_in(self.p)
+ m.d.comb += self.fpdiv.n._connect_out(self.n, do_data=False)
+ m.d.comb += self.n.data_o.eq(self.data_r)
+
+ return m
+
+def resultfn(data_o, expected, i, o):
+ res = expected + 1
+ assert data_o == res, \
+ "%d-%d received data %x not match expected %x\n" \
+ % (i, o, data_o, res)
+
+
+if __name__ == "__main__":
+ dut = FPDIVPipe(width=16)
+ data = data_chain2()
+ ports = dut.ports()
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_fsm_experiment.il", "w") as f:
+ f.write(vl)
+ test = Test5(dut, resultfn, data=data)
+ run_simulation(dut, [test.send, test.rcv],
+ vcd_name="test_fsm_experiment.vcd")
+
--- /dev/null
+""" key strategic example showing how to do multi-input fan-in into a
+ multi-stage pipeline, then multi-output fanout.
+
+ the multiplex ID from the fan-in is passed in to the pipeline, preserved,
+ and used as a routing ID on the fanout.
+"""
+
+from random import randint
+from math import log
+from nmigen import Module, Signal, Cat, Value, Elaboratable
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+
+from multipipe import CombMultiOutPipeline, CombMuxOutPipe
+from multipipe import PriorityCombMuxInPipe
+from singlepipe import SimpleHandshake, RecordObject, Object
+
+
+class PassData2(RecordObject):
+ def __init__(self):
+ RecordObject.__init__(self)
+ self.mid = Signal(2, reset_less=True)
+ self.idx = Signal(8, reset_less=True)
+ self.data = Signal(16, reset_less=True)
+
+
+class PassData(Object):
+ def __init__(self):
+ Object.__init__(self)
+ self.mid = Signal(2, reset_less=True)
+ self.idx = Signal(8, reset_less=True)
+ self.data = Signal(16, reset_less=True)
+
+
+
+class PassThroughStage:
+ def ispec(self):
+ return PassData()
+ def ospec(self):
+ return self.ispec() # same as ospec
+
+ def process(self, i):
+ return i # pass-through
+
+
+
+class PassThroughPipe(SimpleHandshake):
+ def __init__(self):
+ SimpleHandshake.__init__(self, PassThroughStage())
+
+
+class InputTest:
+ def __init__(self, dut):
+ self.dut = dut
+ self.di = {}
+ self.do = {}
+ self.tlen = 100
+ for mid in range(dut.num_rows):
+ self.di[mid] = {}
+ self.do[mid] = {}
+ for i in range(self.tlen):
+ self.di[mid][i] = randint(0, 255) + (mid<<8)
+ self.do[mid][i] = self.di[mid][i]
+
+ def send(self, mid):
+ for i in range(self.tlen):
+ op2 = self.di[mid][i]
+ rs = dut.p[mid]
+ yield rs.valid_i.eq(1)
+ yield rs.data_i.data.eq(op2)
+ yield rs.data_i.idx.eq(i)
+ yield rs.data_i.mid.eq(mid)
+ yield
+ o_p_ready = yield rs.ready_o
+ while not o_p_ready:
+ yield
+ o_p_ready = yield rs.ready_o
+
+ print ("send", mid, i, hex(op2))
+
+ yield rs.valid_i.eq(0)
+ # wait random period of time before queueing another value
+ for i in range(randint(0, 3)):
+ yield
+
+ yield rs.valid_i.eq(0)
+ yield
+
+ print ("send ended", mid)
+
+ ## wait random period of time before queueing another value
+ #for i in range(randint(0, 3)):
+ # yield
+
+ #send_range = randint(0, 3)
+ #if send_range == 0:
+ # send = True
+ #else:
+ # send = randint(0, send_range) != 0
+
+ def rcv(self, mid):
+ while True:
+ #stall_range = randint(0, 3)
+ #for j in range(randint(1,10)):
+ # stall = randint(0, stall_range) != 0
+ # yield self.dut.n[0].ready_i.eq(stall)
+ # yield
+ n = self.dut.n[mid]
+ yield n.ready_i.eq(1)
+ yield
+ o_n_valid = yield n.valid_o
+ i_n_ready = yield n.ready_i
+ if not o_n_valid or not i_n_ready:
+ continue
+
+ out_mid = yield n.data_o.mid
+ out_i = yield n.data_o.idx
+ out_v = yield n.data_o.data
+
+ print ("recv", out_mid, out_i, hex(out_v))
+
+ # see if this output has occurred already, delete it if it has
+ assert mid == out_mid, "out_mid %d not correct %d" % (out_mid, mid)
+ assert out_i in self.do[mid], "out_i %d not in array %s" % \
+ (out_i, repr(self.do[mid]))
+ assert self.do[mid][out_i] == out_v # pass-through data
+ del self.do[mid][out_i]
+
+ # check if there's any more outputs
+ if len(self.do[mid]) == 0:
+ break
+ print ("recv ended", mid)
+
+
+class TestPriorityMuxPipe(PriorityCombMuxInPipe):
+ def __init__(self, num_rows):
+ self.num_rows = num_rows
+ stage = PassThroughStage()
+ PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
+
+
+class OutputTest:
+ def __init__(self, dut):
+ self.dut = dut
+ self.di = []
+ self.do = {}
+ self.tlen = 100
+ for i in range(self.tlen * dut.num_rows):
+ if i < dut.num_rows:
+ mid = i
+ else:
+ mid = randint(0, dut.num_rows-1)
+ data = randint(0, 255) + (mid<<8)
+
+ def send(self):
+ for i in range(self.tlen * dut.num_rows):
+ op2 = self.di[i][0]
+ mid = self.di[i][1]
+ rs = dut.p
+ yield rs.valid_i.eq(1)
+ yield rs.data_i.data.eq(op2)
+ yield rs.data_i.mid.eq(mid)
+ yield
+ o_p_ready = yield rs.ready_o
+ while not o_p_ready:
+ yield
+ o_p_ready = yield rs.ready_o
+
+ print ("send", mid, i, hex(op2))
+
+ yield rs.valid_i.eq(0)
+ # wait random period of time before queueing another value
+ for i in range(randint(0, 3)):
+ yield
+
+ yield rs.valid_i.eq(0)
+
+
+class TestMuxOutPipe(CombMuxOutPipe):
+ def __init__(self, num_rows):
+ self.num_rows = num_rows
+ stage = PassThroughStage()
+ CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
+
+
+class TestInOutPipe(Elaboratable):
+ def __init__(self, num_rows=4):
+ self.num_rows = num_rows
+ self.inpipe = TestPriorityMuxPipe(num_rows) # fan-in (combinatorial)
+ self.pipe1 = PassThroughPipe() # stage 1 (clock-sync)
+ self.pipe2 = PassThroughPipe() # stage 2 (clock-sync)
+ self.outpipe = TestMuxOutPipe(num_rows) # fan-out (combinatorial)
+
+ self.p = self.inpipe.p # kinda annoying,
+ self.n = self.outpipe.n # use pipe in/out as this class in/out
+ self._ports = self.inpipe.ports() + self.outpipe.ports()
+
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.inpipe = self.inpipe
+ m.submodules.pipe1 = self.pipe1
+ m.submodules.pipe2 = self.pipe2
+ m.submodules.outpipe = self.outpipe
+
+ m.d.comb += self.inpipe.n.connect_to_next(self.pipe1.p)
+ m.d.comb += self.pipe1.connect_to_next(self.pipe2)
+ m.d.comb += self.pipe2.connect_to_next(self.outpipe)
+
+ return m
+
+ def ports(self):
+ return self._ports
+
+
+if __name__ == '__main__':
+ dut = TestInOutPipe()
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_inoutmux_pipe.il", "w") as f:
+ f.write(vl)
+ #run_simulation(dut, testbench(dut), vcd_name="test_inputgroup.vcd")
+
+ test = InputTest(dut)
+ run_simulation(dut, [test.rcv(1), test.rcv(0),
+ test.rcv(3), test.rcv(2),
+ test.send(0), test.send(1),
+ test.send(3), test.send(2),
+ ],
+ vcd_name="test_inoutmux_pipe.vcd")
+
--- /dev/null
+from random import randint
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+
+from inputgroup import InputGroup
+
+
+def testbench(dut):
+ stb = yield dut.out_op.stb
+ assert stb == 0
+ ack = yield dut.out_op.ack
+ assert ack == 0
+
+ # set row 1 input 0
+ yield dut.rs[1].in_op[0].eq(5)
+ yield dut.rs[1].stb.eq(0b01) # strobe indicate 1st op ready
+ #yield dut.rs[1].ack.eq(1)
+ yield
+
+ # check row 1 output (should be inactive)
+ decode = yield dut.rs[1].out_decode
+ assert decode == 0
+ if False:
+ op0 = yield dut.rs[1].out_op[0]
+ op1 = yield dut.rs[1].out_op[1]
+ assert op0 == 0 and op1 == 0
+
+ # output should be inactive
+ out_stb = yield dut.out_op.stb
+ assert out_stb == 1
+
+ # set row 0 input 1
+ yield dut.rs[1].in_op[1].eq(6)
+ yield dut.rs[1].stb.eq(0b11) # strobe indicate both ops ready
+
+ # set acknowledgement of output... takes 1 cycle to respond
+ yield dut.out_op.ack.eq(1)
+ yield
+ yield dut.out_op.ack.eq(0) # clear ack on output
+ yield dut.rs[1].stb.eq(0) # clear row 1 strobe
+
+ # output strobe should be active, MID should be 0 until "ack" is set...
+ out_stb = yield dut.out_op.stb
+ assert out_stb == 1
+ out_mid = yield dut.mid
+ assert out_mid == 0
+
+ # ... and output should not yet be passed through either
+ op0 = yield dut.out_op.v[0]
+ op1 = yield dut.out_op.v[1]
+ assert op0 == 0 and op1 == 0
+
+ # wait for out_op.ack to activate...
+ yield dut.rs[1].stb.eq(0b00) # set row 1 strobes to zero
+ yield
+
+ # *now* output should be passed through
+ op0 = yield dut.out_op.v[0]
+ op1 = yield dut.out_op.v[1]
+ assert op0 == 5 and op1 == 6
+
+ # set row 2 input
+ yield dut.rs[2].in_op[0].eq(3)
+ yield dut.rs[2].in_op[1].eq(4)
+ yield dut.rs[2].stb.eq(0b11) # strobe indicate 1st op ready
+ yield dut.out_op.ack.eq(1) # set output ack
+ yield
+ yield dut.rs[2].stb.eq(0) # clear row 2 strobe
+ yield dut.out_op.ack.eq(0) # set output ack
+ yield
+ op0 = yield dut.out_op.v[0]
+ op1 = yield dut.out_op.v[1]
+ assert op0 == 3 and op1 == 4, "op0 %d op1 %d" % (op0, op1)
+ out_mid = yield dut.mid
+ assert out_mid == 2
+
+ # set row 0 and 3 input
+ yield dut.rs[0].in_op[0].eq(9)
+ yield dut.rs[0].in_op[1].eq(8)
+ yield dut.rs[0].stb.eq(0b11) # strobe indicate 1st op ready
+ yield dut.rs[3].in_op[0].eq(1)
+ yield dut.rs[3].in_op[1].eq(2)
+ yield dut.rs[3].stb.eq(0b11) # strobe indicate 1st op ready
+
+ # set acknowledgement of output... takes 1 cycle to respond
+ yield dut.out_op.ack.eq(1)
+ yield
+ yield dut.rs[0].stb.eq(0) # clear row 1 strobe
+ yield
+ out_mid = yield dut.mid
+ assert out_mid == 0, "out mid %d" % out_mid
+
+ yield
+ yield dut.rs[3].stb.eq(0) # clear row 1 strobe
+ yield dut.out_op.ack.eq(0) # clear ack on output
+ yield
+ out_mid = yield dut.mid
+ assert out_mid == 3, "out mid %d" % out_mid
+
+
+class InputTest:
+ def __init__(self, dut):
+ self.dut = dut
+ self.di = {}
+ self.do = {}
+ self.tlen = 10
+ for mid in range(dut.num_rows):
+ self.di[mid] = {}
+ self.do[mid] = {}
+ for i in range(self.tlen):
+ self.di[mid][i] = randint(0, 100)
+ self.do[mid][i] = self.di[mid][i]
+
+ def send(self, mid):
+ for i in range(self.tlen):
+ op2 = self.di[mid][i]
+ rs = dut.rs[mid]
+ ack = yield rs.ack
+ while not ack:
+ yield
+ ack = yield rs.ack
+ yield rs.in_op[0].eq(i)
+ yield rs.in_op[1].eq(op2)
+ yield rs.stb.eq(0b11) # strobe indicate 1st op ready
+ ack = yield rs.ack
+ while ack:
+ yield
+ ack = yield rs.ack
+ yield rs.stb.eq(0)
+
+ # wait random period of time before queueing another value
+ for i in range(randint(0, 8)):
+ yield
+
+ def recv(self):
+ while True:
+ stb = yield dut.out_op.stb
+ yield dut.out_op.ack.eq(0)
+ while not stb:
+ yield dut.out_op.ack.eq(1)
+ yield
+ stb = yield dut.out_op.stb
+
+ stb = yield dut.out_op.stb
+ while stb:
+ yield
+ stb = yield dut.out_op.stb
+ mid = yield dut.mid
+ out_i = yield dut.out_op.v[0]
+ out_v = yield dut.out_op.v[1]
+
+ # see if this output has occurred already, delete it if it has
+ assert out_i in self.do[mid]
+ assert self.do[mid][out_i] == out_v
+ del self.do[mid][out_i]
+
+ # check if there's any more outputs
+ zerolen = True
+ for (k, v) in self.do.items():
+ if v:
+ zerolen = False
+ if zerolen:
+ break
+
+if __name__ == '__main__':
+ dut = InputGroup(width=32)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_inputgroup.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, testbench(dut), vcd_name="test_inputgroup.vcd")
+
+ dut = InputGroup(width=16)
+ test = InputTest(dut)
+ run_simulation(dut, [test.send(3), test.send(2),
+ test.send(1), test.send(0),
+ test.recv()],
+ vcd_name="test_inputgroup_parallel.vcd")
+
--- /dev/null
+import sys
+from random import randint
+from random import seed
+from operator import mul
+
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+
+from fmul import FPMUL
+
+from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan,
+ is_inf, is_pos_inf, is_neg_inf,
+ match, get_case, check_case, run_test,
+ run_edge_cases, run_corner_cases)
+
+
+def testbench(dut):
+ yield from check_case(dut, 0x40000000, 0x40000000, 0x40800000)
+ yield from check_case(dut, 0x41400000, 0x40A00000, 0x42700000)
+
+ count = 0
+
+ #regression tests
+ stimulus_a = [0xba57711a, 0xbf9b1e94, 0x34082401, 0x5e8ef81,
+ 0x5c75da81, 0x2b017]
+ stimulus_b = [0xee1818c5, 0xc038ed3a, 0xb328cd45, 0x114f3db,
+ 0x2f642a39, 0xff3807ab]
+ yield from run_test(dut, stimulus_a, stimulus_b, mul, get_case)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ yield from run_corner_cases(dut, count, mul, get_case)
+ yield from run_edge_cases(dut, count, mul, get_case)
+
+
+if __name__ == '__main__':
+ dut = FPMUL(width=32)
+ run_simulation(dut, testbench(dut), vcd_name="test_mul.vcd")
+
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+from operator import mul
+
+from fmul import FPMUL
+
+import sys
+import atexit
+from random import randint
+from random import seed
+
+from unit_test_double import (get_mantissa, get_exponent, get_sign, is_nan,
+ is_inf, is_pos_inf, is_neg_inf,
+ match, get_case, check_case, run_test,
+ run_edge_cases, run_corner_cases)
+
+
+def testbench(dut):
+ yield from check_case(dut, 0, 0, 0)
+
+ count = 0
+
+ #regression tests
+ stimulus_a = [0xff80000000000000, 0x3351099a0528e138]
+ stimulus_b = [0x7f80000000000000, 0xd651a9a9986af2b5]
+ yield from run_test(dut, stimulus_a, stimulus_b, mul)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ yield from run_corner_cases(dut, count, mul)
+ yield from run_edge_cases(dut, count, mul)
+
+
+if __name__ == '__main__':
+ dut = FPMUL(width=64)
+ run_simulation(dut, testbench(dut), vcd_name="test_mul64.vcd")
+
--- /dev/null
+from random import randint
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+
+from fpbase import MultiShift, MultiShiftR, MultiShiftRMerge
+
+class MultiShiftModL:
+ def __init__(self, width):
+ self.ms = MultiShift(width)
+ self.a = Signal(width)
+ self.b = Signal(self.ms.smax)
+ self.x = Signal(width)
+
+ def elaborate(self, platform=None):
+
+ m = Module()
+ m.d.comb += self.x.eq(self.ms.lshift(self.a, self.b))
+
+ return m
+
+class MultiShiftModR:
+ def __init__(self, width):
+ self.ms = MultiShift(width)
+ self.a = Signal(width)
+ self.b = Signal(self.ms.smax)
+ self.x = Signal(width)
+
+ def elaborate(self, platform=None):
+
+ m = Module()
+ m.d.comb += self.x.eq(self.ms.rshift(self.a, self.b))
+
+ return m
+
+class MultiShiftModRMod:
+ def __init__(self, width):
+ self.ms = MultiShiftR(width)
+ self.a = Signal(width)
+ self.b = Signal(self.ms.smax)
+ self.x = Signal(width)
+
+ def elaborate(self, platform=None):
+
+ m = Module()
+ m.submodules += self.ms
+ m.d.comb += self.ms.i.eq(self.a)
+ m.d.comb += self.ms.s.eq(self.b)
+ m.d.comb += self.x.eq(self.ms.o)
+
+ return m
+
+class MultiShiftRMergeMod:
+ def __init__(self, width):
+ self.ms = MultiShiftRMerge(width)
+ self.a = Signal(width)
+ self.b = Signal(self.ms.smax)
+ self.x = Signal(width)
+
+ def elaborate(self, platform=None):
+
+ m = Module()
+ m.submodules += self.ms
+ m.d.comb += self.ms.inp.eq(self.a)
+ m.d.comb += self.ms.diff.eq(self.b)
+ m.d.comb += self.x.eq(self.ms.m)
+
+ return m
+
+
+def check_case(dut, width, a, b):
+ yield dut.a.eq(a)
+ yield dut.b.eq(b)
+ yield
+
+ x = (a << b) & ((1<<width)-1)
+
+ out_x = yield dut.x
+ assert out_x == x, "Output x 0x%x not equal to expected 0x%x" % (out_x, x)
+
+def check_caser(dut, width, a, b):
+ yield dut.a.eq(a)
+ yield dut.b.eq(b)
+ yield
+
+ x = (a >> b) & ((1<<width)-1)
+
+ out_x = yield dut.x
+ assert out_x == x, "Output x 0x%x not equal to expected 0x%x" % (out_x, x)
+
+
+def check_case_merge(dut, width, a, b):
+ yield dut.a.eq(a)
+ yield dut.b.eq(b)
+ yield
+
+ x = (a >> b) & ((1<<width)-1) # actual shift
+ if (a & ((2<<b)-1)) != 0: # mask for sticky bit
+ x |= 1 # set LSB
+
+ out_x = yield dut.x
+ assert out_x == x, \
+ "\nshift %d\nInput\n%+32s\nOutput x\n%+32s != \n%+32s" % \
+ (b, bin(a), bin(out_x), bin(x))
+
+def testmerge(dut):
+ for i in range(32):
+ for j in range(1000):
+ a = randint(0, (1<<32)-1)
+ yield from check_case_merge(dut, 32, a, i)
+
+def testbench(dut):
+ for i in range(32):
+ for j in range(1000):
+ a = randint(0, (1<<32)-1)
+ yield from check_case(dut, 32, a, i)
+
+def testbenchr(dut):
+ for i in range(32):
+ for j in range(1000):
+ a = randint(0, (1<<32)-1)
+ yield from check_caser(dut, 32, a, i)
+
+if __name__ == '__main__':
+ dut = MultiShiftRMergeMod(width=32)
+ run_simulation(dut, testmerge(dut), vcd_name="test_multishiftmerge.vcd")
+ dut = MultiShiftModRMod(width=32)
+ run_simulation(dut, testbenchr(dut), vcd_name="test_multishift.vcd")
+
+ dut = MultiShiftModR(width=32)
+ run_simulation(dut, testbenchr(dut), vcd_name="test_multishift.vcd")
+
+ dut = MultiShiftModL(width=32)
+ run_simulation(dut, testbench(dut), vcd_name="test_multishift.vcd")
+
--- /dev/null
+from random import randint
+from math import log
+from nmigen import Module, Signal, Cat, Elaboratable
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+
+from multipipe import CombMuxOutPipe
+from singlepipe import SimpleHandshake, PassThroughHandshake, RecordObject
+
+
+class PassInData(RecordObject):
+ def __init__(self):
+ RecordObject.__init__(self)
+ self.mid = Signal(2, reset_less=True)
+ self.data = Signal(16, reset_less=True)
+
+
+class PassThroughStage:
+
+ def ispec(self):
+ return PassInData()
+
+ def ospec(self, name):
+ return Signal(16, name="%s_dout" % name, reset_less=True)
+
+ def process(self, i):
+ return i.data
+
+
+class PassThroughDataStage:
+ def ispec(self):
+ return PassInData()
+ def ospec(self):
+ return self.ispec() # same as ospec
+
+ def process(self, i):
+ return i # pass-through
+
+
+
+class PassThroughPipe(PassThroughHandshake):
+ def __init__(self):
+ PassThroughHandshake.__init__(self, PassThroughDataStage())
+
+
+class OutputTest:
+ def __init__(self, dut):
+ self.dut = dut
+ self.di = []
+ self.do = {}
+ self.tlen = 10
+ for i in range(self.tlen * dut.num_rows):
+ if i < dut.num_rows:
+ mid = i
+ else:
+ mid = randint(0, dut.num_rows-1)
+ data = randint(0, 255) + (mid<<8)
+ if mid not in self.do:
+ self.do[mid] = []
+ self.di.append((data, mid))
+ self.do[mid].append(data)
+
+ def send(self):
+ for i in range(self.tlen * dut.num_rows):
+ op2 = self.di[i][0]
+ mid = self.di[i][1]
+ rs = dut.p
+ yield rs.valid_i.eq(1)
+ yield rs.data_i.data.eq(op2)
+ yield rs.data_i.mid.eq(mid)
+ yield
+ o_p_ready = yield rs.ready_o
+ while not o_p_ready:
+ yield
+ o_p_ready = yield rs.ready_o
+
+ print ("send", mid, i, hex(op2))
+
+ yield rs.valid_i.eq(0)
+ # wait random period of time before queueing another value
+ for i in range(randint(0, 3)):
+ yield
+
+ yield rs.valid_i.eq(0)
+
+ def rcv(self, mid):
+ out_i = 0
+ count = 0
+ stall_range = randint(0, 3)
+ while out_i != len(self.do[mid]):
+ count += 1
+ assert count != 2000, "timeout: too long"
+ n = self.dut.n[mid]
+ yield n.ready_i.eq(1)
+ yield
+ o_n_valid = yield n.valid_o
+ i_n_ready = yield n.ready_i
+ if not o_n_valid or not i_n_ready:
+ continue
+
+ out_v = yield n.data_o
+
+ print ("recv", mid, out_i, hex(out_v))
+
+ assert self.do[mid][out_i] == out_v # pass-through data
+
+ out_i += 1
+
+ if randint(0, 5) == 0:
+ stall_range = randint(0, 3)
+ stall = randint(0, stall_range) != 0
+ if stall:
+ yield n.ready_i.eq(0)
+ for i in range(stall_range):
+ yield
+
+
+class TestPriorityMuxPipe(CombMuxOutPipe):
+ def __init__(self, num_rows):
+ self.num_rows = num_rows
+ stage = PassThroughStage()
+ CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
+
+
+class TestSyncToPriorityPipe(Elaboratable):
+ def __init__(self):
+ self.num_rows = 4
+ self.pipe = PassThroughPipe()
+ self.muxpipe = TestPriorityMuxPipe(self.num_rows)
+
+ self.p = self.pipe.p
+ self.n = self.muxpipe.n
+
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.pipe = self.pipe
+ m.submodules.muxpipe = self.muxpipe
+ m.d.comb += self.pipe.n.connect_to_next(self.muxpipe.p)
+ return m
+
+ def ports(self):
+ res = [self.p.valid_i, self.p.ready_o] + \
+ self.p.data_i.ports()
+ for i in range(len(self.n)):
+ res += [self.n[i].ready_i, self.n[i].valid_o] + \
+ [self.n[i].data_o]
+ #self.n[i].data_o.ports()
+ return res
+
+
+if __name__ == '__main__':
+ dut = TestSyncToPriorityPipe()
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_outmux_pipe.il", "w") as f:
+ f.write(vl)
+
+ test = OutputTest(dut)
+ run_simulation(dut, [test.rcv(1), test.rcv(0),
+ test.rcv(3), test.rcv(2),
+ test.send()],
+ vcd_name="test_outmux_pipe.vcd")
+
--- /dev/null
+from random import randint
+from math import log
+from nmigen import Module, Signal, Cat
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+
+from singlepipe import PassThroughStage
+from multipipe import (CombMultiInPipeline, PriorityCombMuxInPipe)
+
+
+class PassData:
+ def __init__(self):
+ self.mid = Signal(2, reset_less=True)
+ self.idx = Signal(6, reset_less=True)
+ self.data = Signal(16, reset_less=True)
+
+ def eq(self, i):
+ return [self.mid.eq(i.mid), self.idx.eq(i.idx), self.data.eq(i.data)]
+
+ def ports(self):
+ return [self.mid, self.idx, self.data]
+
+
+def testbench(dut):
+ stb = yield dut.out_op.stb
+ assert stb == 0
+ ack = yield dut.out_op.ack
+ assert ack == 0
+
+ # set row 1 input 0
+ yield dut.rs[1].in_op[0].eq(5)
+ yield dut.rs[1].stb.eq(0b01) # strobe indicate 1st op ready
+ #yield dut.rs[1].ack.eq(1)
+ yield
+
+ # check row 1 output (should be inactive)
+ decode = yield dut.rs[1].out_decode
+ assert decode == 0
+ if False:
+ op0 = yield dut.rs[1].out_op[0]
+ op1 = yield dut.rs[1].out_op[1]
+ assert op0 == 0 and op1 == 0
+
+ # output should be inactive
+ out_stb = yield dut.out_op.stb
+ assert out_stb == 1
+
+ # set row 0 input 1
+ yield dut.rs[1].in_op[1].eq(6)
+ yield dut.rs[1].stb.eq(0b11) # strobe indicate both ops ready
+
+ # set acknowledgement of output... takes 1 cycle to respond
+ yield dut.out_op.ack.eq(1)
+ yield
+ yield dut.out_op.ack.eq(0) # clear ack on output
+ yield dut.rs[1].stb.eq(0) # clear row 1 strobe
+
+ # output strobe should be active, MID should be 0 until "ack" is set...
+ out_stb = yield dut.out_op.stb
+ assert out_stb == 1
+ out_mid = yield dut.mid
+ assert out_mid == 0
+
+ # ... and output should not yet be passed through either
+ op0 = yield dut.out_op.v[0]
+ op1 = yield dut.out_op.v[1]
+ assert op0 == 0 and op1 == 0
+
+ # wait for out_op.ack to activate...
+ yield dut.rs[1].stb.eq(0b00) # set row 1 strobes to zero
+ yield
+
+ # *now* output should be passed through
+ op0 = yield dut.out_op.v[0]
+ op1 = yield dut.out_op.v[1]
+ assert op0 == 5 and op1 == 6
+
+ # set row 2 input
+ yield dut.rs[2].in_op[0].eq(3)
+ yield dut.rs[2].in_op[1].eq(4)
+ yield dut.rs[2].stb.eq(0b11) # strobe indicate 1st op ready
+ yield dut.out_op.ack.eq(1) # set output ack
+ yield
+ yield dut.rs[2].stb.eq(0) # clear row 2 strobe
+ yield dut.out_op.ack.eq(0) # set output ack
+ yield
+ op0 = yield dut.out_op.v[0]
+ op1 = yield dut.out_op.v[1]
+ assert op0 == 3 and op1 == 4, "op0 %d op1 %d" % (op0, op1)
+ out_mid = yield dut.mid
+ assert out_mid == 2
+
+ # set row 0 and 3 input
+ yield dut.rs[0].in_op[0].eq(9)
+ yield dut.rs[0].in_op[1].eq(8)
+ yield dut.rs[0].stb.eq(0b11) # strobe indicate 1st op ready
+ yield dut.rs[3].in_op[0].eq(1)
+ yield dut.rs[3].in_op[1].eq(2)
+ yield dut.rs[3].stb.eq(0b11) # strobe indicate 1st op ready
+
+ # set acknowledgement of output... takes 1 cycle to respond
+ yield dut.out_op.ack.eq(1)
+ yield
+ yield dut.rs[0].stb.eq(0) # clear row 1 strobe
+ yield
+ out_mid = yield dut.mid
+ assert out_mid == 0, "out mid %d" % out_mid
+
+ yield
+ yield dut.rs[3].stb.eq(0) # clear row 1 strobe
+ yield dut.out_op.ack.eq(0) # clear ack on output
+ yield
+ out_mid = yield dut.mid
+ assert out_mid == 3, "out mid %d" % out_mid
+
+
+class InputTest:
+ def __init__(self, dut):
+ self.dut = dut
+ self.di = {}
+ self.do = {}
+ self.tlen = 10
+ for mid in range(dut.num_rows):
+ self.di[mid] = {}
+ self.do[mid] = {}
+ for i in range(self.tlen):
+ self.di[mid][i] = randint(0, 100) + (mid<<8)
+ self.do[mid][i] = self.di[mid][i]
+
+ def send(self, mid):
+ for i in range(self.tlen):
+ op2 = self.di[mid][i]
+ rs = dut.p[mid]
+ yield rs.valid_i.eq(1)
+ yield rs.data_i.data.eq(op2)
+ yield rs.data_i.idx.eq(i)
+ yield rs.data_i.mid.eq(mid)
+ yield
+ o_p_ready = yield rs.ready_o
+ while not o_p_ready:
+ yield
+ o_p_ready = yield rs.ready_o
+
+ print ("send", mid, i, hex(op2))
+
+ yield rs.valid_i.eq(0)
+ # wait random period of time before queueing another value
+ for i in range(randint(0, 3)):
+ yield
+
+ yield rs.valid_i.eq(0)
+ ## wait random period of time before queueing another value
+ #for i in range(randint(0, 3)):
+ # yield
+
+ #send_range = randint(0, 3)
+ #if send_range == 0:
+ # send = True
+ #else:
+ # send = randint(0, send_range) != 0
+
+ def rcv(self):
+ while True:
+ #stall_range = randint(0, 3)
+ #for j in range(randint(1,10)):
+ # stall = randint(0, stall_range) != 0
+ # yield self.dut.n[0].ready_i.eq(stall)
+ # yield
+ n = self.dut.n
+ yield n.ready_i.eq(1)
+ yield
+ o_n_valid = yield n.valid_o
+ i_n_ready = yield n.ready_i
+ if not o_n_valid or not i_n_ready:
+ continue
+
+ mid = yield n.data_o.mid
+ out_i = yield n.data_o.idx
+ out_v = yield n.data_o.data
+
+ print ("recv", mid, out_i, hex(out_v))
+
+ # see if this output has occurred already, delete it if it has
+ assert out_i in self.do[mid], "out_i %d not in array %s" % \
+ (out_i, repr(self.do[mid]))
+ assert self.do[mid][out_i] == out_v # pass-through data
+ del self.do[mid][out_i]
+
+ # check if there's any more outputs
+ zerolen = True
+ for (k, v) in self.do.items():
+ if v:
+ zerolen = False
+ if zerolen:
+ break
+
+
+class TestPriorityMuxPipe(PriorityCombMuxInPipe):
+ def __init__(self):
+ self.num_rows = 4
+ def iospecfn(): return PassData()
+ stage = PassThroughStage(iospecfn)
+ PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
+
+
+if __name__ == '__main__':
+ dut = TestPriorityMuxPipe()
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_inputgroup_multi.il", "w") as f:
+ f.write(vl)
+ #run_simulation(dut, testbench(dut), vcd_name="test_inputgroup.vcd")
+
+ test = InputTest(dut)
+ run_simulation(dut, [test.send(1), test.send(0),
+ test.send(3), test.send(2),
+ test.rcv()],
+ vcd_name="test_inputgroup_multi.vcd")
+
--- /dev/null
+from random import randint
+from random import seed
+from operator import add
+
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+
+from fadd_state import FPADD
+
+from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan,
+ is_inf, is_pos_inf, is_neg_inf,
+ match, get_case, check_case, run_test,
+ run_edge_cases, run_corner_cases)
+
+def testbench(dut):
+ yield from check_case(dut, 0xFFFFFFFF, 0xC63B800A, 0xFFC00000)
+ yield from check_case(dut, 0xFF800000, 0x7F800000, 0xFFC00000)
+ #yield from check_case(dut, 0xFF800000, 0x7F800000, 0x7FC00000)
+ yield from check_case(dut, 0x7F800000, 0xFF800000, 0xFFC00000)
+ yield from check_case(dut, 0x42540000, 0xC2540000, 0x00000000)
+ yield from check_case(dut, 0xC2540000, 0x42540000, 0x00000000)
+ yield from check_case(dut, 0xfe34f995, 0xff5d59ad, 0xff800000)
+ yield from check_case(dut, 0x82471f51, 0x243985f, 0x801c3790)
+ yield from check_case(dut, 0, 0, 0)
+ yield from check_case(dut, 0x40000000, 0xc0000000, 0x00000000)
+ yield from check_case(dut, 0x3F800000, 0x40000000, 0x40400000)
+ yield from check_case(dut, 0x40000000, 0x3F800000, 0x40400000)
+ yield from check_case(dut, 0x447A0000, 0x4488B000, 0x4502D800)
+ yield from check_case(dut, 0x463B800A, 0x42BA8A3D, 0x463CF51E)
+ yield from check_case(dut, 0x42BA8A3D, 0x463B800A, 0x463CF51E)
+ yield from check_case(dut, 0x463B800A, 0xC2BA8A3D, 0x463A0AF6)
+ yield from check_case(dut, 0xC2BA8A3D, 0x463B800A, 0x463A0AF6)
+ yield from check_case(dut, 0xC63B800A, 0x42BA8A3D, 0xC63A0AF6)
+ yield from check_case(dut, 0x42BA8A3D, 0xC63B800A, 0xC63A0AF6)
+ yield from check_case(dut, 0x7F800000, 0x00000000, 0x7F800000)
+ yield from check_case(dut, 0x00000000, 0x7F800000, 0x7F800000)
+ yield from check_case(dut, 0xFF800000, 0x00000000, 0xFF800000)
+ yield from check_case(dut, 0x00000000, 0xFF800000, 0xFF800000)
+ yield from check_case(dut, 0x7F800000, 0x7F800000, 0x7F800000)
+ yield from check_case(dut, 0xFF800000, 0xFF800000, 0xFF800000)
+ yield from check_case(dut, 0x00018643, 0x00FA72A4, 0x00FBF8E7)
+ yield from check_case(dut, 0x001A2239, 0x00FA72A4, 0x010A4A6E)
+ yield from check_case(dut, 0x3F7FFFFE, 0x3F7FFFFE, 0x3FFFFFFE)
+ yield from check_case(dut, 0x7EFFFFEE, 0x7EFFFFEE, 0x7F7FFFEE)
+ yield from check_case(dut, 0x7F7FFFEE, 0xFEFFFFEE, 0x7EFFFFEE)
+ yield from check_case(dut, 0x7F7FFFEE, 0x756CA884, 0x7F7FFFFD)
+ yield from check_case(dut, 0x7F7FFFEE, 0x758A0CF8, 0x7F7FFFFF)
+ yield from check_case(dut, 0x42500000, 0x51A7A358, 0x51A7A358)
+ yield from check_case(dut, 0x51A7A358, 0x42500000, 0x51A7A358)
+ yield from check_case(dut, 0x4E5693A4, 0x42500000, 0x4E5693A5)
+ yield from check_case(dut, 0x42500000, 0x4E5693A4, 0x4E5693A5)
+ #yield from check_case(dut, 1, 0, 1)
+ #yield from check_case(dut, 1, 1, 1)
+
+ count = 0
+
+ #regression tests
+ stimulus_a = [0x22cb525a, 0x40000000, 0x83e73d5c, 0xbf9b1e94, 0x34082401,
+ 0x5e8ef81, 0x5c75da81, 0x2b017]
+ stimulus_b = [0xadd79efa, 0xC0000000, 0x1c800000, 0xc038ed3a, 0xb328cd45,
+ 0x114f3db, 0x2f642a39, 0xff3807ab]
+ yield from run_test(dut, stimulus_a, stimulus_b, add, get_case)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ yield from run_corner_cases(dut, count, add, get_case)
+ yield from run_edge_cases(dut, count, add, get_case)
+
+if __name__ == '__main__':
+ dut = FPADD(width=32, single_cycle=True)
+ run_simulation(dut, testbench(dut), vcd_name="test_state_add.vcd")
+
--- /dev/null
+from random import randint
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog
+
+from inputgroup import FPGetSyncOpsMod
+
+
+def testbench(dut):
+ stb = yield dut.stb
+ assert stb == 0
+ ack = yield dut.ack
+ assert ack == 0
+
+ yield dut.in_op[0].eq(5)
+ yield dut.stb.eq(0b01)
+ yield dut.ack.eq(1)
+ yield
+ yield
+ decode = yield dut.out_decode
+ assert decode == 0
+
+ op0 = yield dut.out_op[0]
+ op1 = yield dut.out_op[1]
+ assert op0 == 0 and op1 == 0
+
+ yield dut.in_op[1].eq(6)
+ yield dut.stb.eq(0b11)
+ yield
+ yield
+
+ op0 = yield dut.out_op[0]
+ op1 = yield dut.out_op[1]
+ assert op0 == 5 and op1 == 6
+
+ yield dut.ack.eq(0)
+ yield
+
+ op0 = yield dut.out_op[0]
+ op1 = yield dut.out_op[1]
+ assert op0 == 0 and op1 == 0
+
+if __name__ == '__main__':
+ dut = FPGetSyncOpsMod(width=32)
+ run_simulation(dut, testbench(dut), vcd_name="test_getsyncops.vcd")
+ vl = verilog.convert(dut, ports=dut.ports())
+ with open("test_getsyncops.v", "w") as f:
+ f.write(vl)
--- /dev/null
+import sys
+from random import randint
+from random import seed
+
+from sfpy import Float64
+
+def get_mantissa(x):
+ return x & 0x000fffffffffffff
+
+def get_exponent(x):
+ return ((x & 0x7ff0000000000000) >> 52) - 1023
+
+def get_sign(x):
+ return ((x & 0x8000000000000000) >> 63)
+
+def is_nan(x):
+ return get_exponent(x) == 1024 and get_mantissa(x) != 0
+
+def is_inf(x):
+ return get_exponent(x) == 1024 and get_mantissa(x) == 0
+
+def is_pos_inf(x):
+ return is_inf(x) and not get_sign(x)
+
+def is_neg_inf(x):
+ return is_inf(x) and get_sign(x)
+
+def match(x, y):
+ return (
+ (is_pos_inf(x) and is_pos_inf(y)) or
+ (is_neg_inf(x) and is_neg_inf(y)) or
+ (is_nan(x) and is_nan(y)) or
+ (x == y)
+ )
+
+def get_case(dut, a, b):
+ yield dut.in_a.v.eq(a)
+ yield dut.in_a.stb.eq(1)
+ yield
+ yield
+ a_ack = (yield dut.in_a.ack)
+ assert a_ack == 0
+ yield dut.in_b.v.eq(b)
+ yield dut.in_b.stb.eq(1)
+ b_ack = (yield dut.in_b.ack)
+ assert b_ack == 0
+
+ while True:
+ yield
+ out_z_stb = (yield dut.out_z.stb)
+ if not out_z_stb:
+ continue
+ yield dut.in_a.stb.eq(0)
+ yield dut.in_b.stb.eq(0)
+ yield dut.out_z.ack.eq(1)
+ yield
+ yield dut.out_z.ack.eq(0)
+ yield
+ yield
+ break
+
+ out_z = yield dut.out_z.v
+ return out_z
+
+def check_case(dut, a, b, z):
+ out_z = yield from get_case(dut, a, b)
+ assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
+
+
+def run_test(dut, stimulus_a, stimulus_b, op):
+
+ expected_responses = []
+ actual_responses = []
+ for a, b in zip(stimulus_a, stimulus_b):
+ af = Float64.from_bits(a)
+ bf = Float64.from_bits(b)
+ z = op(af, bf)
+ expected_responses.append(z.get_bits())
+ #print (af, bf, z)
+ actual = yield from get_case(dut, a, b)
+ actual_responses.append(actual)
+
+ if len(actual_responses) < len(expected_responses):
+ print ("Fail ... not enough results")
+ exit(0)
+
+ for exp, act, a, b in zip(expected_responses, actual_responses,
+ stimulus_a, stimulus_b):
+ passed = match(exp, act)
+
+ if not passed:
+
+ print ("Fail ... expected:", hex(exp), "actual:", hex(act))
+
+ print (hex(a))
+ print ("a mantissa:", a & 0x000fffffffffffff)
+ print ("a exponent:", ((a & 0x7ff0000000000000) >> 52)\
+ - 1023)
+ print ("a sign:", ((a & 0x8000000000000000) >> 63))
+
+ print (hex(b))
+ print ("b mantissa:", b & 0x000fffffffffffff)
+ print ("b exponent:", ((b & 0x7ff0000000000000) >> 52)\
+ - 1023)
+ print ("b sign:", ((b & 0x8000000000000000) >> 63))
+
+ print (hex(exp))
+ print ("expected mantissa:", exp & 0x000fffffffffffff)
+ print ("expected exponent:", ((exp & 0x7ff0000000000000) >> 52)\
+ - 1023)
+ print ("expected sign:", ((exp & 0x8000000000000000) >> 63))
+
+ print (hex(act))
+ print ("actual mantissa:", act & 0x000fffffffffffff)
+ print ("actual exponent:", ((act & 0x7ff0000000000000) >> 52)\
+ - 1023)
+ print ("actual sign:", ((act & 0x8000000000000000) >> 63))
+
+ sys.exit(0)
+
+
+def run_corner_cases(dut, count, op):
+ #corner cases
+ from itertools import permutations
+ stimulus_a = [i[0] for i in permutations([
+ 0x8000000000000000,
+ 0x0000000000000000,
+ 0x7ff8000000000000,
+ 0xfff8000000000000,
+ 0x7ff0000000000000,
+ 0xfff0000000000000
+ ], 2)]
+ stimulus_b = [i[1] for i in permutations([
+ 0x8000000000000000,
+ 0x0000000000000000,
+ 0x7ff8000000000000,
+ 0xfff8000000000000,
+ 0x7ff0000000000000,
+ 0xfff0000000000000
+ ], 2)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+
+def run_edge_cases(dut, count, op):
+ #edge cases
+ stimulus_a = [0x8000000000000000 for i in range(1000)]
+ stimulus_b = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_a = [0x0000000000000000 for i in range(1000)]
+ stimulus_b = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_b = [0x8000000000000000 for i in range(1000)]
+ stimulus_a = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_b = [0x0000000000000000 for i in range(1000)]
+ stimulus_a = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_a = [0x7FF8000000000000 for i in range(1000)]
+ stimulus_b = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_a = [0xFFF8000000000000 for i in range(1000)]
+ stimulus_b = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_b = [0x7FF8000000000000 for i in range(1000)]
+ stimulus_a = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_b = [0xFFF8000000000000 for i in range(1000)]
+ stimulus_a = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_a = [0x7FF0000000000000 for i in range(1000)]
+ stimulus_b = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_a = [0xFFF0000000000000 for i in range(1000)]
+ stimulus_b = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_b = [0x7FF0000000000000 for i in range(1000)]
+ stimulus_a = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_b = [0xFFF0000000000000 for i in range(1000)]
+ stimulus_a = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ #seed(0)
+ for i in range(100000):
+ stimulus_a = [randint(0, 1<<64) for i in range(1000)]
+ stimulus_b = [randint(0, 1<<64) for i in range(1000)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += 1000
+ print (count, "random vectors passed")
+
--- /dev/null
+from random import randint
+from random import seed
+
+import sys
+from sfpy import Float16
+
+def get_mantissa(x):
+ return 0x3ff & x
+
+def get_exponent(x):
+ return ((x & 0xf800) >> 11) - 15
+
+def get_sign(x):
+ return ((x & 0x8000) >> 15)
+
+def is_nan(x):
+ return get_exponent(x) == 16 and get_mantissa(x) != 0
+
+def is_inf(x):
+ return get_exponent(x) == 16 and get_mantissa(x) == 0
+
+def is_pos_inf(x):
+ return is_inf(x) and not get_sign(x)
+
+def is_neg_inf(x):
+ return is_inf(x) and get_sign(x)
+
+def match(x, y):
+ return (
+ (is_pos_inf(x) and is_pos_inf(y)) or
+ (is_neg_inf(x) and is_neg_inf(y)) or
+ (is_nan(x) and is_nan(y)) or
+ (x == y)
+ )
+
+def get_case(dut, a, b):
+ yield dut.in_a.v.eq(a)
+ yield dut.in_a.stb.eq(1)
+ yield
+ yield
+ a_ack = (yield dut.in_a.ack)
+ assert a_ack == 0
+ yield dut.in_b.v.eq(b)
+ yield dut.in_b.stb.eq(1)
+ b_ack = (yield dut.in_b.ack)
+ assert b_ack == 0
+
+ while True:
+ yield
+ out_z_stb = (yield dut.out_z.stb)
+ if not out_z_stb:
+ continue
+ yield dut.in_a.stb.eq(0)
+ yield dut.in_b.stb.eq(0)
+ yield dut.out_z.ack.eq(1)
+ yield
+ yield dut.out_z.ack.eq(0)
+ yield
+ yield
+ break
+
+ out_z = yield dut.out_z.v
+ return out_z
+
+def check_case(dut, a, b, z):
+ out_z = yield from get_case(dut, a, b)
+ assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
+
+
+def run_test(dut, stimulus_a, stimulus_b, op):
+
+ expected_responses = []
+ actual_responses = []
+ for a, b in zip(stimulus_a, stimulus_b):
+ af = Float16.from_bits(a)
+ bf = Float16.from_bits(b)
+ z = op(af, bf)
+ expected_responses.append(z.get_bits())
+ #print (af, bf, z)
+ actual = yield from get_case(dut, a, b)
+ actual_responses.append(actual)
+
+ if len(actual_responses) < len(expected_responses):
+ print ("Fail ... not enough results")
+ exit(0)
+
+ for expected, actual, a, b in zip(expected_responses, actual_responses,
+ stimulus_a, stimulus_b):
+ passed = match(expected, actual)
+
+ if not passed:
+
+ print ("Fail ... expected:", hex(expected), "actual:", hex(actual))
+
+ print (hex(a))
+ print ("a mantissa:", get_mantissa(a))
+ print ("a exponent:", get_exponent(a))
+ print ("a sign:", get_sign(a))
+
+ print (hex(b))
+ print ("b mantissa:", get_mantissa(b))
+ print ("b exponent:", get_exponent(b))
+ print ("b sign:", get_sign(b))
+
+ print (hex(expected))
+ print ("expected mantissa:", get_mantissa(expected))
+ print ("expected exponent:", get_exponent(expected))
+ print ("expected sign:", get_sign(expected))
+
+ print (hex(actual))
+ print ("actual mantissa:", get_mantissa(actual))
+ print ("actual exponent:", get_exponent(actual))
+ print ("actual sign:", get_sign(actual))
+
+ sys.exit(0)
+
+def run_corner_cases(dut, count, op):
+ #corner cases
+ corners = [0x8000, 0x0000, 0x7800, 0xf800, 0x7c00, 0xfc00]
+ from itertools import permutations
+ stimulus_a = [i[0] for i in permutations(corners, 2)]
+ stimulus_b = [i[1] for i in permutations(corners, 2)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+
+def run_edge_cases(dut, count, op):
+ maxint16 = 1<<16
+ maxcount = 10
+ #edge cases
+ stimulus_a = [0x8000 for i in range(maxcount)]
+ stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_a = [0x0000 for i in range(maxcount)]
+ stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_b = [0x8000 for i in range(maxcount)]
+ stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_b = [0x0000 for i in range(maxcount)]
+ stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_a = [0x7800 for i in range(maxcount)]
+ stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_a = [0xF800 for i in range(maxcount)]
+ stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_b = [0x7800 for i in range(maxcount)]
+ stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_b = [0xF800 for i in range(maxcount)]
+ stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_a = [0x7C00 for i in range(maxcount)]
+ stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_a = [0xFC00 for i in range(maxcount)]
+ stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_b = [0x7C00 for i in range(maxcount)]
+ stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ stimulus_b = [0xFC00 for i in range(maxcount)]
+ stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+ #seed(0)
+ for i in range(100000):
+ stimulus_a = [randint(0, maxint16-1) for i in range(maxcount)]
+ stimulus_b = [randint(0, maxint16-1) for i in range(maxcount)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op)
+ count += maxcount
+ print (count, "random vectors passed")
+
--- /dev/null
+from random import randint
+from random import seed
+
+import sys
+from sfpy import Float32
+
+def get_mantissa(x):
+ return 0x7fffff & x
+
+def get_exponent(x):
+ return ((x & 0x7f800000) >> 23) - 127
+
+def set_exponent(x, e):
+ return (x & ~0x7f800000) | ((e+127) << 23)
+
+def get_sign(x):
+ return ((x & 0x80000000) >> 31)
+
+def is_nan(x):
+ return get_exponent(x) == 128 and get_mantissa(x) != 0
+
+def is_inf(x):
+ return get_exponent(x) == 128 and get_mantissa(x) == 0
+
+def is_pos_inf(x):
+ return is_inf(x) and not get_sign(x)
+
+def is_neg_inf(x):
+ return is_inf(x) and get_sign(x)
+
+def match(x, y):
+ return (
+ (is_pos_inf(x) and is_pos_inf(y)) or
+ (is_neg_inf(x) and is_neg_inf(y)) or
+ (is_nan(x) and is_nan(y)) or
+ (x == y)
+ )
+
+def get_rs_case(dut, a, b, mid):
+ in_a, in_b = dut.rs[0]
+ out_z = dut.res[0]
+ yield dut.ids.in_mid.eq(mid)
+ yield in_a.v.eq(a)
+ yield in_a.valid_i.eq(1)
+ yield
+ yield
+ yield
+ yield
+ a_ack = (yield in_a.ready_o)
+ assert a_ack == 0
+
+ yield in_a.valid_i.eq(0)
+
+ yield in_b.v.eq(b)
+ yield in_b.valid_i.eq(1)
+ yield
+ yield
+ b_ack = (yield in_b.ready_o)
+ assert b_ack == 0
+
+ yield in_b.valid_i.eq(0)
+
+ yield out_z.ready_i.eq(1)
+
+ while True:
+ out_z_stb = (yield out_z.valid_o)
+ if not out_z_stb:
+ yield
+ continue
+ vout_z = yield out_z.v
+ #out_mid = yield dut.ids.out_mid
+ yield out_z.ready_i.eq(0)
+ yield
+ break
+
+ return vout_z, mid
+
+def check_rs_case(dut, a, b, z, mid=None):
+ if mid is None:
+ mid = randint(0, 6)
+ mid = 0
+ out_z, out_mid = yield from get_rs_case(dut, a, b, mid)
+ assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
+ assert out_mid == mid, "Output mid 0x%x != expected 0x%x" % (out_mid, mid)
+
+
+def get_case(dut, a, b, mid):
+ #yield dut.in_mid.eq(mid)
+ yield dut.in_a.v.eq(a)
+ yield dut.in_a.valid_i_test.eq(1)
+ yield
+ yield
+ yield
+ yield
+ a_ack = (yield dut.in_a.ready_o)
+ assert a_ack == 0
+
+ yield dut.in_a.valid_i.eq(0)
+
+ yield dut.in_b.v.eq(b)
+ yield dut.in_b.valid_i.eq(1)
+ yield
+ yield
+ b_ack = (yield dut.in_b.ready_o)
+ assert b_ack == 0
+
+ yield dut.in_b.valid_i.eq(0)
+
+ yield dut.out_z.ready_i.eq(1)
+
+ while True:
+ out_z_stb = (yield dut.out_z.valid_o)
+ if not out_z_stb:
+ yield
+ continue
+ out_z = yield dut.out_z.v
+ #out_mid = yield dut.out_mid
+ yield dut.out_z.ready_i.eq(0)
+ yield
+ break
+
+ return out_z, mid # TODO: mid
+
+def check_case(dut, a, b, z, mid=None):
+ if mid is None:
+ mid = randint(0, 6)
+ mid = 0
+ out_z, out_mid = yield from get_case(dut, a, b, mid)
+ assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
+ assert out_mid == mid, "Output mid 0x%x != expected 0x%x" % (out_mid, mid)
+
+
+def run_test(dut, stimulus_a, stimulus_b, op, get_case_fn):
+
+ expected_responses = []
+ actual_responses = []
+ for a, b in zip(stimulus_a, stimulus_b):
+ mid = randint(0, 6)
+ mid = 0
+ af = Float32.from_bits(a)
+ bf = Float32.from_bits(b)
+ z = op(af, bf)
+ expected_responses.append((z.get_bits(), mid))
+ actual = yield from get_case_fn(dut, a, b, mid)
+ actual_responses.append(actual)
+
+ if len(actual_responses) < len(expected_responses):
+ print ("Fail ... not enough results")
+ exit(0)
+
+ for expected, actual, a, b in zip(expected_responses, actual_responses,
+ stimulus_a, stimulus_b):
+ passed = match(expected[0], actual[0])
+ if expected[1] != actual[1]: # check mid
+ print ("MID failed", expected[1], actual[1])
+ sys.exit(0)
+
+ if not passed:
+
+ expected = expected[0]
+ actual = actual[0]
+ print ("Fail ... expected:", hex(expected), "actual:", hex(actual))
+
+ print (hex(a))
+ print ("a mantissa:", a & 0x7fffff)
+ print ("a exponent:", ((a & 0x7f800000) >> 23) - 127)
+ print ("a sign:", ((a & 0x80000000) >> 31))
+
+ print (hex(b))
+ print ("b mantissa:", b & 0x7fffff)
+ print ("b exponent:", ((b & 0x7f800000) >> 23) - 127)
+ print ("b sign:", ((b & 0x80000000) >> 31))
+
+ print (hex(expected))
+ print ("expected mantissa:", expected & 0x7fffff)
+ print ("expected exponent:", ((expected & 0x7f800000) >> 23) - 127)
+ print ("expected sign:", ((expected & 0x80000000) >> 31))
+
+ print (hex(actual))
+ print ("actual mantissa:", actual & 0x7fffff)
+ print ("actual exponent:", ((actual & 0x7f800000) >> 23) - 127)
+ print ("actual sign:", ((actual & 0x80000000) >> 31))
+
+ sys.exit(0)
+
+corner_cases = [0x80000000, 0x00000000, 0x7f800000, 0xff800000,
+ 0x7fc00000, 0xffc00000]
+
+def run_corner_cases(dut, count, op, get_case_fn):
+ #corner cases
+ from itertools import permutations
+ stimulus_a = [i[0] for i in permutations(corner_cases, 2)]
+ stimulus_b = [i[1] for i in permutations(corner_cases, 2)]
+ yield from run_test(dut, stimulus_a, stimulus_b, op, get_case_fn)
+ count += len(stimulus_a)
+ print (count, "vectors passed")
+
+def run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn):
+ yield from run_test(dut, stimulus_a, stimulus_b, op, get_case_fn)
+ yield from run_test(dut, stimulus_b, stimulus_a, op, get_case_fn)
+
+def run_cases(dut, count, op, fixed_num, num_entries, get_case_fn):
+ if isinstance(fixed_num, int):
+ stimulus_a = [fixed_num for i in range(num_entries)]
+ report = hex(fixed_num)
+ else:
+ stimulus_a = fixed_num
+ report = "random"
+
+ stimulus_b = [randint(0, 1<<32) for i in range(num_entries)]
+ yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn)
+ count += len(stimulus_a)
+ print (count, "vectors passed 2^32", report)
+
+ # non-canonical NaNs.
+ stimulus_b = [set_exponent(randint(0, 1<<32), 128) \
+ for i in range(num_entries)]
+ yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn)
+ count += len(stimulus_a)
+ print (count, "vectors passed Non-Canonical NaN", report)
+
+ # -127
+ stimulus_b = [set_exponent(randint(0, 1<<32), -127) \
+ for i in range(num_entries)]
+ yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn)
+ count += len(stimulus_a)
+ print (count, "vectors passed exp=-127", report)
+
+ # nearly zero
+ stimulus_b = [set_exponent(randint(0, 1<<32), -126) \
+ for i in range(num_entries)]
+ yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn)
+ count += len(stimulus_a)
+ print (count, "vectors passed exp=-126", report)
+
+ # nearly inf
+ stimulus_b = [set_exponent(randint(0, 1<<32), 127) \
+ for i in range(num_entries)]
+ yield from run_test_2(dut, stimulus_a, stimulus_b, op, get_case_fn)
+ count += len(stimulus_a)
+ print (count, "vectors passed exp=127", report)
+
+ return count
+
+def run_edge_cases(dut, count, op, get_case_fn):
+ #edge cases
+ for testme in corner_cases:
+ count = yield from run_cases(dut, count, op, testme, 10, get_case_fn)
+
+ for i in range(100000):
+ stimulus_a = [randint(0, 1<<32) for i in range(10)]
+ count = yield from run_cases(dut, count, op, stimulus_a, 10,
+ get_case_fn)
+ return count
+