+from math import log
+
from nmigen.compat.sim import run_simulation
from nmigen.cli import verilog, rtlil
from nmigen import Module, Signal, Cat, Array, Const, Repl, Elaboratable
from nmutil.iocontrol import RecordObject
-from nmutil.nmoperator import eq
+from nmutil.nmoperator import eq, shape, cat
class Instruction(RecordObject):
class InstructionQ(Elaboratable):
""" contains a queue of (part-decoded) instructions.
- it is expected that the user of this queue will simply
- inspect the queue contents directly, indicating at the start
- of each clock cycle how many need to be removed.
+ output is copied combinatorially from the front of the queue,
+ for easy access on the clock cycle. only "n_in" instructions
+ are made available this way
+
+ input and shifting occurs on sync.
"""
def __init__(self, wid, opwid, iqlen, n_in, n_out):
""" constructor
* :iqlen: instruction queue length
* :n_in: max number of instructions allowed "in"
"""
+ self.iqlen = iqlen
self.reg_width = wid
self.opwid = opwid
self.n_in = n_in
self.n_out = n_out
+ mqbits = (int(log(iqlen) / log(2))+2, False)
- self.q = Instruction.nq(iqlen, "i", wid, opwid)
- self.qlen_o = Signal(max=iqlen)
-
- self.p_add_i = Signal(max=n_in) # instructions to add (from data_i)
+ self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
self.p_ready_o = Signal() # instructions were added
self.data_i = Instruction.nq(n_in, "data_i", wid, opwid)
self.data_o = Instruction.nq(n_out, "data_o", wid, opwid)
- self.n_sub_i = Signal(max=n_out) # number of instructions to remove
- self.n_sub_o = Signal(max=n_out) # number of instructions removed
+ self.n_sub_i = Signal(mqbits) # number of instructions to remove
+ self.n_sub_o = Signal(mqbits) # number of instructions removed
+
+ self.qsz = shape(self.data_o[0])[0]
+ q = []
+ for i in range(iqlen):
+ q.append(Signal(self.qsz, name="q%d" % i))
+ self.q = Array(q)
+ self.qlen_o = Signal(mqbits)
def elaborate(self, platform):
m = Module()
comb = m.d.comb
sync = m.d.sync
- iqlen = len(self.q)
- mqlen = Const(iqlen, iqlen*2)
+ iqlen = self.iqlen
+ mqbits = int(log(iqlen) / log(2))
+
+ left = Signal((mqbits+2, False))
+ spare = Signal((mqbits+2, False))
+ qmaxed = Signal()
- start_copy = Signal(max=iqlen*2)
- end_copy = Signal(max=iqlen*2)
+ start_q = Signal(mqbits)
+ end_q = Signal(mqbits)
+ mqlen = Const(iqlen, (len(left), False))
+ print ("mqlen", mqlen)
# work out how many can be subtracted from the queue
- with m.If(self.n_sub_i >= self.qlen_o):
- comb += self.n_sub_o.eq(self.qlen_o)
- with m.Elif(self.n_sub_i):
- comb += self.n_sub_o.eq(self.n_sub_i)
+ with m.If(self.n_sub_i):
+ qinmax = Signal()
+ comb += qinmax.eq(self.n_sub_i > self.qlen_o)
+ with m.If(qinmax):
+ comb += self.n_sub_o.eq(self.qlen_o)
+ with m.Else():
+ comb += self.n_sub_o.eq(self.n_sub_i)
+
+ # work out how many new items are going to be in the queue
+ comb += left.eq(self.qlen_o )#- self.n_sub_o)
+ comb += spare.eq(mqlen - self.p_add_i)
+ comb += qmaxed.eq(left <= spare)
+ comb += self.p_ready_o.eq(qmaxed & (self.p_add_i != 0))
+
+ # put q (flattened) into output
+ for i in range(self.n_out):
+ opos = Signal(mqbits)
+ comb += opos.eq(end_q + i - self.n_out) # end hasn't moved yet
+ comb += cat(self.data_o[i]).eq(self.q[opos])
+
+ with m.If(self.n_sub_o):
+ # ok now the end's moved
+ sync += end_q.eq(end_q + self.n_sub_o)
- # work out the start and end of where data can be written
- comb += start_copy.eq(self.qlen_o - self.n_sub_o)
- comb += end_copy.eq(start_copy + self.p_add_i - 1)
- comb += self.p_ready_o.eq(end_copy < self.qlen_o) # ready if room exists
+ with m.If(self.p_ready_o):
+ # copy in the input... insanely gate-costly... *sigh*...
+ for i in range(self.n_in):
+ with m.If(self.p_add_i > Const(i, len(self.p_add_i))):
+ ipos = Signal(mqbits)
+ comb += ipos.eq(start_q + i) # should roll round
+ sync += self.q[ipos].eq(cat(self.data_i[i]))
+ sync += start_q.eq(start_q + self.p_add_i)
- # this is going to be _so_ expensive in terms of gates... *sigh*...
with m.If(self.p_ready_o):
- for i in range(iqlen):
- cfrom = Signal(max=iqlen*2)
- cto = Signal(max=iqlen*2)
- comb += cfrom.eq(Const(i, iqlen+1) + start_copy)
- comb += cto.eq(Const(i, iqlen+1) + end_copy)
- with m.If((cfrom < mqlen) & (cto < mqlen)):
- sync += self.q[cto].oper_i.eq(self.q[cfrom].oper_i)
- sync += self.q[cto].dest_i.eq(self.q[cfrom].dest_i)
- sync += self.q[cto].src1_i.eq(self.q[cfrom].src1_i)
- sync += self.q[cto].src2_i.eq(self.q[cfrom].src2_i)
+ # update the queue length
+ add2 = Signal(mqbits+1)
+ comb += add2.eq(self.qlen_o + self.p_add_i)
+ sync += self.qlen_o.eq(add2 - self.n_sub_o)
+ with m.Else():
+ sync += self.qlen_o.eq(self.qlen_o - self.n_sub_o)
return m
def __iter__(self):
- for o in self.q:
- yield from list(o)
- yield self.qlen_o
+ yield from self.q
yield self.p_ready_o
for o in self.data_i: