stack of changes to MultiCompUnit to speed it up
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Wed, 1 Dec 2021 19:47:27 +0000 (19:47 +0000)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Wed, 1 Dec 2021 19:47:27 +0000 (19:47 +0000)
a lot of comb settings were changed to sync as a hack-experiment
when TestIssuer (ls180) was being developed, to try to speed up ECP5
timing.  those need reverting to reduce latency

src/soc/experiment/compalu_multi.py
src/soc/fu/compunits/compunits.py
src/soc/simple/test/test_core.py

index e0931a03c5c55ec46f76e443a2890f72ad4deb64..f76e40660ac3181bda5e4af48ef8aba97f952964 100644 (file)
@@ -255,13 +255,15 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
         m.d.sync += opc_l.s.eq(self.issue_i)       # set on issue
         m.d.sync += opc_l.r.eq(req_done)  # reset on ALU
 
-        # src operand latch (not using go_wr_i)
-        m.d.sync += src_l.s.eq(Repl(self.issue_i, self.n_src) & ~self.rdmaskn)
+        # src operand latch (not using go_wr_i) ANDed with rdmask
+        rdmaskn = Signal(self.n_src)
+        latchregister(m, self.rdmaskn, rdmaskn, self.issue_i, name="rdmask_l")
+        m.d.comb += src_l.s.eq(Repl(self.issue_i, self.n_src) & ~rdmaskn)
         m.d.sync += src_l.r.eq(reset_r)
 
         # dest operand latch (not using issue_i)
         rw_domain += req_l.s.eq(alu_pulsem & self.wrmask)
-        m.d.sync += req_l.r.eq(reset_w | prev_wr_go)
+        m.d.comb += req_l.r.eq(reset_w | prev_wr_go)
 
         # pass operation to the ALU (sync: plenty time to wait for src reads)
         op = self.get_op()
@@ -277,10 +279,10 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
             ok = Const(1, 1)
             data_r_ok = Const(1, 1)
             if isinstance(lro, Record):
+                print("wr fields", i, lro, lro.fields)
                 data_r = Record.like(lro, name=name)
-                print("wr fields", i, lro, data_r.fields)
                 # bye-bye abstract interface design..
-                fname = find_ok(data_r.fields)
+                fname = find_ok(lro.fields)
                 if fname:
                     ok = getattr(lro, fname)
                     data_r_ok = getattr(data_r, fname)
@@ -289,13 +291,13 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
                 # XXX fails - wrok.append((ok|data_r_ok) & self.busy_o)
                 wrok.append(ok & self.busy_o)
             else:
+                data_r = Signal.like(lro, name=name)
                 # really should retire this but it's part of unit tests
-                data_r = Signal.like(lro, name=name, reset_less=True)
                 wrok.append(ok & self.busy_o)
-            with m.If(alu_pulse):
-                rw_domain += data_r.eq(lro)
+            #latchregister(m, lro, data_r, ok & self.busy_o, name=name)
+            latchregister(m, lro, data_r, alu_pulse, name=name)
             with m.If(self.issue_i):
-                rw_domain += data_r.eq(0)
+                m.d.comb += data_r.eq(0)
             drl.append(data_r)
 
         # ok, above we collated anything with an "ok" on the output side
@@ -364,12 +366,15 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
         m.d.comb += self.busy_o.eq(opc_l.q)  # busy out
 
         # read-release gated by busy (and read-mask)
-        bro = Repl(self.busy_o, self.n_src)
+        if True: #self.sync_rw: - experiment (doesn't work)
+            bro = Repl(self.busy_o, self.n_src)
+        else:
+            bro = Repl(self.busy_o|self.issue_i, self.n_src)
         m.d.comb += self.rd.rel_o.eq(src_l.q & bro & slg)
 
         # write-release gated by busy and by shadow (and write-mask)
         brd = Repl(self.busy_o & self.shadown_i, self.n_dst)
-        m.d.comb += self.wr.rel_o.eq(req_l.q & brd)
+        m.d.comb += self.wr.rel_o.eq(req_l.q_int & brd)
 
         # output the data from the latch on go_write
         for i in range(self.n_dst):
index 6c8c669692fa6d8a0bf213b91533754208303ab4..971efa9edd3d8ab80e7d75f863a5384960d9454a 100644 (file)
@@ -168,7 +168,7 @@ class FunctionUnitBaseMulti(ReservationStations2):
             alu_name = "alu_%s%d" % (alu_name, idx)
             palu = self.pseudoalus[idx]
             cu = MultiCompUnit(regspec, palu, opsubset, name=alu_name,
-                               sync_rw=True)
+                               sync_rw=False)
             cu.fnunit = self.fnunit
             cu.fu_muxidx = idx
             self.cu.append(cu)
index 83f741e9d170d3801e68453886bc63e9a465ce04..cbb093d286c5c2ba4689c4c3e14421ac5725a938 100644 (file)
@@ -352,7 +352,7 @@ class TestRunner(FHDLTestCase):
 if __name__ == "__main__":
     unittest.main(exit=False)
     suite = unittest.TestSuite()
-    #suite.addTest(TestRunner(HazardTestCase().test_data))
+    suite.addTest(TestRunner(HazardTestCase().test_data))
     suite.addTest(TestRunner(RandomHazardTestCase().test_data))
     #suite.addTest(TestRunner(LDSTTestCase().test_data))
     #suite.addTest(TestRunner(CRTestCase().test_data))