start adding in immediates into CompUnit ALU

[soc.git] / src / experiment / compalu.py
diff --git a/src/experiment/compalu.py b/src/experiment/compalu.py

index 06cd9e5763b05767fe4b11d3075ca918f70c90d8..c8815353f5199d5ccf1b3ca46837f4dca94472eb 100644 (file)
--- a/src/experiment/compalu.py
+++ b/src/experiment/compalu.py
@@ -1,6 +1,6 @@
  from nmigen.compat.sim import run_simulation
  from nmigen.cli import verilog, rtlil
  from nmigen.compat.sim import run_simulation
  from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable
+from nmigen import Module, Signal, Mux, Elaboratable
  
  from nmutil.latch import SRLatch, latchregister
  
  
  from nmutil.latch import SRLatch, latchregister
  
@@ -18,7 +18,7 @@ from nmutil.latch import SRLatch, latchregister
        The src1 and src2 registers and the operand can be latched in
        at this point
  
        The src1 and src2 registers and the operand can be latched in
        at this point
  
-    * Read request is set, which is ackowledged through the Scoreboard
+    * Read request is set, which is acknowledged through the Scoreboard
        to the priority picker, which generates (one and only one) Go_Read
        at a time.  One of those will (eventually) be this Computation Unit.
  
        to the priority picker, which generates (one and only one) Go_Read
        at a time.  One of those will (eventually) be this Computation Unit.
  
@@ -36,10 +36,16 @@ from nmutil.latch import SRLatch, latchregister
        register is placed combinatorially onto the output, and (2) the
        req_l latch is cleared, busy is dropped, and the Comp Unit is back
        through its revolving door to do another task.
        register is placed combinatorially onto the output, and (2) the
        req_l latch is cleared, busy is dropped, and the Comp Unit is back
        through its revolving door to do another task.
+
+    Notes on oper_i:
+
+    * bits[0:2] are for the ALU, add=0, sub=1, shift=2, mul=3
+    * bit[2] are the immediate (bit[2]=1 == immediate mode)
  """
  
  class ComputationUnitNoDelay(Elaboratable):
      def __init__(self, rwid, opwid, alu):
  """
  
  class ComputationUnitNoDelay(Elaboratable):
      def __init__(self, rwid, opwid, alu):
+        self.opwid = opwid
          self.rwid = rwid
          self.alu = alu
  
          self.rwid = rwid
          self.alu = alu
  
@@ -47,8 +53,11 @@ class ComputationUnitNoDelay(Elaboratable):
          self.go_rd_i = Signal(reset_less=True) # go read in
          self.go_wr_i = Signal(reset_less=True) # go write in
          self.issue_i = Signal(reset_less=True) # fn issue in
          self.go_rd_i = Signal(reset_less=True) # go read in
          self.go_wr_i = Signal(reset_less=True) # go write in
          self.issue_i = Signal(reset_less=True) # fn issue in
+        self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
+        self.go_die_i = Signal() # go die (reset)
  
          self.oper_i = Signal(opwid, reset_less=True) # opcode in
  
          self.oper_i = Signal(opwid, reset_less=True) # opcode in
+        self.imm_i = Signal(rwid, reset_less=True) # immediate in
          self.src1_i = Signal(rwid, reset_less=True) # oper1 in
          self.src2_i = Signal(rwid, reset_less=True) # oper2 in
  
          self.src1_i = Signal(rwid, reset_less=True) # oper1 in
          self.src2_i = Signal(rwid, reset_less=True) # oper2 in
  
@@ -64,62 +73,107 @@ class ComputationUnitNoDelay(Elaboratable):
          m.submodules.opc_l = opc_l = SRLatch(sync=False)
          m.submodules.req_l = req_l = SRLatch(sync=False)
  
          m.submodules.opc_l = opc_l = SRLatch(sync=False)
          m.submodules.req_l = req_l = SRLatch(sync=False)
  
+        # shadow/go_die
+        reset_w = Signal(reset_less=True)
+        reset_r = Signal(reset_less=True)
+        m.d.comb += reset_w.eq(self.go_wr_i | self.go_die_i)
+        m.d.comb += reset_r.eq(self.go_rd_i | self.go_die_i)
+
          # This is fascinating and very important to observe that this
          # is in effect a "3-way revolving door".  At no time may all 3
          # latches be set at the same time.
  
          # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
          m.d.sync += opc_l.s.eq(self.issue_i) # XXX NOTE: INVERTED FROM book!
          # This is fascinating and very important to observe that this
          # is in effect a "3-way revolving door".  At no time may all 3
          # latches be set at the same time.
  
          # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
          m.d.sync += opc_l.s.eq(self.issue_i) # XXX NOTE: INVERTED FROM book!
-        m.d.sync += opc_l.r.eq(self.go_wr_i) # XXX NOTE: INVERTED FROM book!
+        m.d.sync += opc_l.r.eq(reset_w)      # XXX NOTE: INVERTED FROM book!
  
          # src operand latch (not using go_wr_i)
          m.d.sync += src_l.s.eq(self.issue_i)
  
          # src operand latch (not using go_wr_i)
          m.d.sync += src_l.s.eq(self.issue_i)
-        m.d.sync += src_l.r.eq(self.go_rd_i)
+        m.d.sync += src_l.r.eq(reset_r)
  
          # dest operand latch (not using issue_i)
          m.d.sync += req_l.s.eq(self.go_rd_i)
  
          # dest operand latch (not using issue_i)
          m.d.sync += req_l.s.eq(self.go_rd_i)
-        m.d.sync += req_l.r.eq(self.go_wr_i)
+        m.d.sync += req_l.r.eq(reset_w)
+
+
+        # create a latch/register for the operand
+        oper_r = Signal(self.opwid+1, reset_less=True) # opcode reg
+        latchregister(m, self.oper_i, oper_r, self.issue_i)
+
+        # and one for the output from the ALU
+        data_r = Signal(self.rwid, reset_less=True) # Dest register
+        latchregister(m, self.alu.o, data_r, req_l.q)
+
+        # get the top 2 bits for the ALU
+        m.d.comb += self.alu.op.eq(oper_r[0:2])
  
  
-        # XXX
-        # XXX NOTE: sync on req_rel_o and data_o due to simulation lock-up
-        # XXX
+        # 3rd bit is whether this is an immediate or not
+        op_is_imm = Signal(reset_less=True)
+        m.d.comb += op_is_imm.eq(oper_r[2])
  
  
+        # select immediate if opcode says so.  however also change the latch
+        # to trigger *from* the opcode latch instead.
+        src2_or_imm = Signal(self.rwid, reset_less=True)
+        src_sel = Signal(reset_less=True)
+        m.d.comb += src_sel.eq(Mux(op_is_imm, opc_l.qn, src_l.q))
+        m.d.comb += src2_or_imm.eq(Mux(op_is_imm, self.imm_i, self.src2_i))
+
+        # create a latch/register for src1/src2
+        latchregister(m, self.src1_i, self.alu.a, src_l.q)
+        latchregister(m, src2_or_imm, self.alu.b, src_sel)
+
+        # -----
          # outputs
          # outputs
-        m.d.comb += self.busy_o.eq(opc_l.q) # busy out
-        m.d.comb += self.rd_rel_o.eq(src_l.q & opc_l.q) # src1/src2 req rel
+        # -----
+
+        # all request signals gated by busy_o.  prevents picker problems
+        busy_o = self.busy_o
+        m.d.comb += busy_o.eq(opc_l.q) # busy out
+        m.d.comb += self.rd_rel_o.eq(src_l.q & busy_o) # src1/src2 req rel
  
  
+        # the counter is just for demo purposes, to get the ALUs of different
+        # types to take arbitrary completion times
          with m.If(opc_l.qn):
              m.d.sync += self.counter.eq(0)
          with m.If(opc_l.qn):
              m.d.sync += self.counter.eq(0)
-        with m.If(req_l.qn & opc_l.q & (self.counter == 0)):
-            with m.If(self.oper_i == 2): # MUL, to take 5 instructions
+        with m.If(req_l.qn & busy_o & (self.counter == 0)):
+            with m.If(self.alu.op == 2): # MUL, to take 5 instructions
                  m.d.sync += self.counter.eq(5)
                  m.d.sync += self.counter.eq(5)
-            with m.Elif(self.oper_i == 3): # SHIFT to take 7
+            with m.Elif(self.alu.op == 3): # SHIFT to take 7
                  m.d.sync += self.counter.eq(7)
                  m.d.sync += self.counter.eq(7)
+            with m.Elif(self.alu.op >= 4): # Branches take 6 (to test shadow)
+                m.d.sync += self.counter.eq(6)
              with m.Else(): # ADD/SUB to take 2
                  m.d.sync += self.counter.eq(2)
          with m.If(self.counter > 1):
              m.d.sync += self.counter.eq(self.counter - 1)
          with m.If(self.counter == 1):
              with m.Else(): # ADD/SUB to take 2
                  m.d.sync += self.counter.eq(2)
          with m.If(self.counter > 1):
              m.d.sync += self.counter.eq(self.counter - 1)
          with m.If(self.counter == 1):
-            m.d.comb += self.req_rel_o.eq(req_l.q & opc_l.q) # req release out
-
-        # create a latch/register for src1/src2
-        latchregister(m, self.src1_i, self.alu.a, src_l.q)
-        latchregister(m, self.src2_i, self.alu.b, src_l.q)
-        #with m.If(src_l.qn):
-        #    m.d.comb += self.alu.op.eq(self.oper_i)
-
-        # create a latch/register for the operand
-        latchregister(m, self.oper_i, self.alu.op, src_l.q)
-
-        # and one for the output from the ALU
-        data_r = Signal(self.rwid, reset_less=True) # Dest register
-        latchregister(m, self.alu.o, data_r, req_l.q)
+            # write req release out.  waits until shadow is dropped.
+            m.d.comb += self.req_rel_o.eq(req_l.q & busy_o & self.shadown_i)
  
          with m.If(self.go_wr_i):
              m.d.comb += self.data_o.eq(data_r)
  
          return m
  
  
          with m.If(self.go_wr_i):
              m.d.comb += self.data_o.eq(data_r)
  
          return m
  
+    def __iter__(self):
+        yield self.go_rd_i
+        yield self.go_wr_i
+        yield self.issue_i
+        yield self.shadown_i
+        yield self.go_die_i
+        yield self.oper_i
+        yield self.imm_i
+        yield self.src1_i
+        yield self.src2_i
+        yield self.busy_o
+        yield self.rd_rel_o
+        yield self.req_rel_o
+        yield self.data_o
+
+    def ports(self):
+        return list(self)
+
+
  def scoreboard_sim(dut):
      yield dut.dest_i.eq(1)
      yield dut.issue_i.eq(1)
  def scoreboard_sim(dut):
      yield dut.dest_i.eq(1)
      yield dut.issue_i.eq(1)
@@ -143,12 +197,14 @@ def scoreboard_sim(dut):
      yield
  
  def test_scoreboard():
      yield
  
  def test_scoreboard():
-    dut = Scoreboard(32, 8)
+    from alu_hier import ALU
+    alu = ALU(16)
+    dut = ComputationUnitNoDelay(16, 8, alu)
      vl = rtlil.convert(dut, ports=dut.ports())
      vl = rtlil.convert(dut, ports=dut.ports())
-    with open("test_scoreboard.il", "w") as f:
+    with open("test_compalu.il", "w") as f:
          f.write(vl)
  
          f.write(vl)
  
-    run_simulation(dut, scoreboard_sim(dut), vcd_name='test_scoreboard.vcd')
+    run_simulation(dut, scoreboard_sim(dut), vcd_name='test_compalu.vcd')
  
  if __name__ == '__main__':
      test_scoreboard()
  
  if __name__ == '__main__':
      test_scoreboard()