leave off number being subtracted from "ready_o" calculation

[soc.git] / src / scoreboard / instruction_q.py
diff --git a/src/scoreboard/instruction_q.py b/src/scoreboard/instruction_q.py

index 94056e537ec7307e386d1638f7d2d3ff1a95fc54..f32a91acf79d72e518f62a06e41d34d06092d3c0 100644 (file)
--- a/src/scoreboard/instruction_q.py
+++ b/src/scoreboard/instruction_q.py
@@ -1,8 +1,10 @@
+from math import log
+
  from nmigen.compat.sim import run_simulation
  from nmigen.cli import verilog, rtlil
  from nmigen import Module, Signal, Cat, Array, Const, Repl, Elaboratable
  from nmutil.iocontrol import RecordObject
-from nmutil.nmoperator import eq
+from nmutil.nmoperator import eq, shape, cat
  
  
  class Instruction(RecordObject):
@@ -24,9 +26,11 @@ class Instruction(RecordObject):
  class InstructionQ(Elaboratable):
      """ contains a queue of (part-decoded) instructions.
  
-        it is expected that the user of this queue will simply
-        inspect the queue contents directly, indicating at the start
-        of each clock cycle how many need to be removed.
+        output is copied combinatorially from the front of the queue,
+        for easy access on the clock cycle.  only "n_in" instructions
+        are made available this way
+
+        input and shifting occurs on sync.
      """
      def __init__(self, wid, opwid, iqlen, n_in, n_out):
          """ constructor
@@ -38,63 +42,91 @@ class InstructionQ(Elaboratable):
              * :iqlen:       instruction queue length
              * :n_in:        max number of instructions allowed "in"
          """
+        self.iqlen = iqlen
          self.reg_width = wid
          self.opwid = opwid
          self.n_in = n_in
          self.n_out = n_out
+        mqbits = (int(log(iqlen) / log(2))+2, False)
  
-        self.q = Instruction.nq(iqlen, "i", wid, opwid)
-        self.qlen_o = Signal(max=iqlen)
-
-        self.p_add_i = Signal(max=n_in) # instructions to add (from data_i)
+        self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
          self.p_ready_o = Signal() # instructions were added
          self.data_i = Instruction.nq(n_in, "data_i", wid, opwid)
          
          self.data_o = Instruction.nq(n_out, "data_o", wid, opwid)
-        self.n_sub_i = Signal(max=n_out) # number of instructions to remove
-        self.n_sub_o = Signal(max=n_out) # number of instructions removed
+        self.n_sub_i = Signal(mqbits) # number of instructions to remove
+        self.n_sub_o = Signal(mqbits) # number of instructions removed
+
+        self.qsz = shape(self.data_o[0])[0]
+        q = []
+        for i in range(iqlen):
+            q.append(Signal(self.qsz, name="q%d" % i))
+        self.q = Array(q)
+        self.qlen_o = Signal(mqbits)
  
      def elaborate(self, platform):
          m = Module()
          comb = m.d.comb
          sync = m.d.sync
  
-        iqlen = len(self.q)
-        mqlen = Const(iqlen, iqlen*2)
+        iqlen = self.iqlen
+        mqbits = int(log(iqlen) / log(2))
+
+        left = Signal((mqbits+2, False))
+        spare = Signal((mqbits+2, False))
+        qmaxed = Signal()
  
-        start_copy = Signal(max=iqlen*2)
-        end_copy = Signal(max=iqlen*2)
+        start_q = Signal(mqbits)
+        end_q = Signal(mqbits)
+        mqlen = Const(iqlen, (len(left), False))
+        print ("mqlen", mqlen)
  
          # work out how many can be subtracted from the queue
-        with m.If(self.n_sub_i >= self.qlen_o):
-            comb += self.n_sub_o.eq(self.qlen_o)
-        with m.Elif(self.n_sub_i):
-            comb += self.n_sub_o.eq(self.n_sub_i)
+        with m.If(self.n_sub_i):
+            qinmax = Signal()
+            comb += qinmax.eq(self.n_sub_i > self.qlen_o)
+            with m.If(qinmax):
+                comb += self.n_sub_o.eq(self.qlen_o)
+            with m.Else():
+                comb += self.n_sub_o.eq(self.n_sub_i)
+
+        # work out how many new items are going to be in the queue
+        comb += left.eq(self.qlen_o )#- self.n_sub_o)
+        comb += spare.eq(mqlen - self.p_add_i)
+        comb += qmaxed.eq(left <= spare)
+        comb += self.p_ready_o.eq(qmaxed & (self.p_add_i != 0))
+
+        # put q (flattened) into output
+        for i in range(self.n_out):
+            opos = Signal(mqbits)
+            comb += opos.eq(end_q + i - self.n_out) # end hasn't moved yet
+            comb += cat(self.data_o[i]).eq(self.q[opos])
+
+        with m.If(self.n_sub_o):
+            # ok now the end's moved
+            sync += end_q.eq(end_q + self.n_sub_o)
  
-        # work out the start and end of where data can be written
-        comb += start_copy.eq(self.qlen_o - self.n_sub_o)
-        comb += end_copy.eq(start_copy + self.p_add_i - 1)
-        comb += self.p_ready_o.eq(end_copy < self.qlen_o) # ready if room exists
+        with m.If(self.p_ready_o):
+            # copy in the input... insanely gate-costly... *sigh*...
+            for i in range(self.n_in):
+                with m.If(self.p_add_i > Const(i, len(self.p_add_i))):
+                    ipos = Signal(mqbits)
+                    comb += ipos.eq(start_q + i) # should roll round
+                    sync += self.q[ipos].eq(cat(self.data_i[i]))
+            sync += start_q.eq(start_q + self.p_add_i)
  
-        # this is going to be _so_ expensive in terms of gates... *sigh*...
          with m.If(self.p_ready_o):
-            for i in range(iqlen):
-                cfrom = Signal(max=iqlen*2)
-                cto = Signal(max=iqlen*2)
-                comb += cfrom.eq(Const(i, iqlen+1) + start_copy)
-                comb += cto.eq(Const(i, iqlen+1) + end_copy)
-                with m.If((cfrom < mqlen) & (cto < mqlen)):
-                    sync += self.q[cto].oper_i.eq(self.q[cfrom].oper_i)
-                    sync += self.q[cto].dest_i.eq(self.q[cfrom].dest_i)
-                    sync += self.q[cto].src1_i.eq(self.q[cfrom].src1_i)
-                    sync += self.q[cto].src2_i.eq(self.q[cfrom].src2_i)
+            # update the queue length
+            add2 = Signal(mqbits+1)
+            comb += add2.eq(self.qlen_o + self.p_add_i)
+            sync += self.qlen_o.eq(add2 - self.n_sub_o)
+        with m.Else():
+            sync += self.qlen_o.eq(self.qlen_o - self.n_sub_o)
  
          return m
  
      def __iter__(self):
-        for o in self.q:
-            yield from list(o)
-        yield self.qlen_o
+        yield from self.q
  
          yield self.p_ready_o
          for o in self.data_i: