From: Alain D D Williams Date: Tue, 30 Mar 2021 18:10:09 +0000 (+0100) Subject: Merge branch 'master' of git.libre-soc.org:soc X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=94bbe976e7a4efcc7091b9485d6c0bed892afe8c;hp=4ed6854f9fa1887692770c7c5d68878504cf40b6;p=soc.git Merge branch 'master' of git.libre-soc.org:soc --- diff --git a/Makefile b/Makefile index 20fe4921..abb446dd 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ mkpinmux: cp pinmux/ls180/ls180_pins.py src/soc/debug cp pinmux/ls180/ls180_pins.py src/soc/litex/florent/libresoc -install: develop mkpinmux +install: gitupdate develop mkpinmux svanalysis pywriter: python3 src/soc/decoder/pseudo/pywriter.py @@ -21,13 +21,14 @@ svanalysis: develop: python3 setup.py develop # yes, develop, not install - python3 src/soc/decoder/pseudo/pywriter.py -run_sim: install - python3 src/soc/simple/issuer_verilog.py --disable-svp64\ +# build and run libresoc litex simulation +run_sim: + python3 src/soc/simple/issuer_verilog.py --disable-svp64 \ src/soc/litex/florent/libresoc/libresoc.v python3 src/soc/litex/florent/sim.py --cpu=libresoc +# and with test gpio (useful for XICS IRC testing) testgpio_run_sim: python3 src/soc/simple/issuer_verilog.py \ src/soc/litex/florent/libresoc/libresoc.v \ @@ -38,9 +39,24 @@ testgpio_run_sim: ls180_verilog: python3 src/soc/simple/issuer_verilog.py \ --debug=jtag --enable-core --enable-pll \ - --enable-xics --enable-sram4x4kblock --disable-svp64 + --enable-xics --disable-svp64 \ src/soc/litex/florent/libresoc/libresoc.v +ls180_4k_verilog: + python3 src/soc/simple/issuer_verilog.py \ + --debug=jtag --enable-core --enable-pll \ + --enable-xics --enable-sram4x4kblock --disable-svp64 \ + src/soc/litex/florent/libresoc/libresoc.v + +# build the litex libresoc SoC without 4k SRAMs +ls180_verilog_build: ls180_verilog + make -C soc/soc/litex/florent ls180 + +# build the litex libresoc SoC with 4k SRAMs +ls180_4ksram_verilog_build: ls180_4k_verilog + make -C soc/soc/litex/florent ls1804k + +# testing (usually done at install time) test: install python3 setup.py test # could just run nosetest3... @@ -58,13 +74,16 @@ BUILDDIR = build help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) +# copies all documentation to libre-soc (libre-soc admins only) +htmlupload: clean html + rsync -HPavz --delete build/html/* \ + libre-soc.org:/var/www/libre-soc.org/docs/soc/ + # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile + echo "catch-all falling through to sphinx for document building" mkdir -p "$(SOURCEDIR)"/src/gen sphinx-apidoc --ext-autodoc -o "$(SOURCEDIR)"/src/gen ./src/soc @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -htmlupload: clean html - rsync -HPavz --delete build/html/* \ - libre-soc.org:/var/www/libre-soc.org/docs/soc/ diff --git a/libreriscv b/libreriscv index 84cababe..59c4a072 160000 --- a/libreriscv +++ b/libreriscv @@ -1 +1 @@ -Subproject commit 84cababea353b8c1882eb3b1512438579c31d37c +Subproject commit 59c4a072402ffaf98cb43fc2a33084e7bf1f1a6b diff --git a/pinmux b/pinmux index 6ea0beaa..160c3204 160000 --- a/pinmux +++ b/pinmux @@ -1 +1 @@ -Subproject commit 6ea0beaabfa993fc6cb369ab1c5731d8f6a839c3 +Subproject commit 160c3204e21612289e5b1a0a1a1f8647e9f83870 diff --git a/src/soc/decoder/decode2execute1.py b/src/soc/decoder/decode2execute1.py index a6ac6262..6333dcdf 100644 --- a/src/soc/decoder/decode2execute1.py +++ b/src/soc/decoder/decode2execute1.py @@ -5,7 +5,8 @@ based on Anton Blanchard microwatt decode2.vhdl """ from nmigen import Signal, Record from nmutil.iocontrol import RecordObject -from soc.decoder.power_enums import MicrOp, CryIn, Function, SPR, LDSTMode +from soc.decoder.power_enums import (MicrOp, CryIn, Function, + SPRfull, SPRreduced, LDSTMode) from soc.consts import TT from soc.experiment.mem_types import LDSTException @@ -83,7 +84,13 @@ class Decode2ToOperand(IssuerDecode2ToOperand): class Decode2ToExecute1Type(RecordObject): - def __init__(self, name=None, asmcode=True, opkls=None, do=None): + def __init__(self, name=None, asmcode=True, opkls=None, do=None, + regreduce_en=False): + + if regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull if do is None and opkls is None: opkls = Decode2ToOperand diff --git a/src/soc/decoder/isa/mem.py b/src/soc/decoder/isa/mem.py index 7a806a40..15df1cbd 100644 --- a/src/soc/decoder/isa/mem.py +++ b/src/soc/decoder/isa/mem.py @@ -16,8 +16,6 @@ from copy import copy from soc.decoder.selectable_int import (FieldSelectableInt, SelectableInt, selectconcat) -from soc.decoder.power_enums import SPR as DEC_SPR - from soc.decoder.helpers import exts, gtu, ltu, undefined import math import sys @@ -49,7 +47,11 @@ class Mem: for i, val in enumerate(mem): initial_mem[startaddr + row_bytes*i] = (val, row_bytes) - for addr, (val, width) in initial_mem.items(): + for addr, val in initial_mem.items(): + if isinstance(val, tuple): + (val, width) = val + else: + width = row_bytes # assume same width #val = swap_order(val, width) self.st(addr, val, width, swap=False) diff --git a/src/soc/decoder/isa/radixmmu.py b/src/soc/decoder/isa/radixmmu.py index edb3c8bb..d1894e7d 100644 --- a/src/soc/decoder/isa/radixmmu.py +++ b/src/soc/decoder/isa/radixmmu.py @@ -198,7 +198,6 @@ def NLS(x): """ -testaddr = 0x10000 testmem = { 0x10000: # PARTITION_TABLE_2 (not implemented yet) @@ -208,7 +207,6 @@ testmem = { 0x30000: # RADIX_ROOT_PTE # V = 1 L = 0 NLB = 0x400 NLS = 9 0x8000000000040009, -######## 0x4000000 #### wrong address calculated by _get_pgtable_addr 0x40000: # RADIX_SECOND_LEVEL # V = 1 L = 1 SW = 0 RPN = 0 # R = 1 C = 1 ATT = 0 EAA 0x7 @@ -218,8 +216,36 @@ testmem = { # RTS1 = 0x2 RPDB = 0x300 RTS2 = 0x5 RPDS = 13 0x40000000000300ad, } - +# this one has a 2nd level RADIX with a RPN of 0x5000 +testmem2 = { + + 0x10000: # PARTITION_TABLE_2 (not implemented yet) + # PATB_GR=1 PRTB=0x1000 PRTS=0xb + 0x800000000100000b, + + 0x30000: # RADIX_ROOT_PTE + # V = 1 L = 0 NLB = 0x400 NLS = 9 + 0x8000000000040009, + 0x40000: # RADIX_SECOND_LEVEL + # V = 1 L = 1 SW = 0 RPN = 0x5000 + # R = 1 C = 1 ATT = 0 EAA 0x7 + 0xc000000005000187, + + 0x1000000: # PROCESS_TABLE_3 + # RTS1 = 0x2 RPDB = 0x300 RTS2 = 0x5 RPDS = 13 + 0x40000000000300ad, + } + + +testresult = """ + prtbl = 1000000 + DCACHE GET 1000000 PROCESS_TABLE_3 + DCACHE GET 30000 RADIX_ROOT_PTE V = 1 L = 0 + DCACHE GET 40000 RADIX_SECOND_LEVEL V = 1 L = 1 + DCACHE GET 10000 PARTITION_TABLE_2 +translated done 1 err 0 badtree 0 addr 40000 pte 0 +""" # see qemu/target/ppc/mmu-radix64.c for reference class RADIX: @@ -258,6 +284,7 @@ class RADIX: #shift = SelectableInt(0, 32) pte = self._walk_tree(addr, pgbase, mode, mbits, shift, priv) + # use pte to caclculate phys address return self.mem.ld(address, width, swap, check_in_mem) @@ -285,19 +312,27 @@ class RADIX: def _next_level(self, addr, entry_width, swap, check_in_mem): # implement read access to mmu mem here - value = 0 - if addr.value in testmem: - value = testmem[addr.value] - else: - print("not found") + value = self.mem.ld(addr.value, 8, False, check_in_mem) + assert(value is not None, "address lookup %x not found" % addr.value) - ##value = self.mem.ld(addr.value, entry_width, swap, check_in_mem) print("addr", hex(addr.value)) data = SelectableInt(value, 64) # convert to SelectableInt print("value", hex(value)) # index += 1 return data; + def _prtable_lookup(self, prtbl, addr, pid): + # v.shift := unsigned('0' & r.prtbl(4 downto 0)); + shift = prtbl[59:63] + print("shift",shift) + prtable_addr = self._get_prtable_addr(shift, prtbl, addr, pid) + print("prtable_addr",prtable_addr) + # TODO check and loop if needed + + assert(prtable_addr==0x1000000) + print("fetch data from PROCESS_TABLE_3") + return "TODO" + def _walk_tree(self, addr, pgbase, mode, mbits, shift, priv=1): """walk tree @@ -375,8 +410,15 @@ class RADIX: print("last 8 bits ----------") print + prtbl = SelectableInt(0x1000000,64) #FIXME do not hardcode + # get address of root entry + shift = selectconcat(SelectableInt(0,1),prtbl[58:63]) # TODO verify addr_next = self._get_prtable_addr(shift, prtbl, addr, pidr) + print("starting with prtable, addr_next",addr_next) + + assert(addr_next.bits == 64) + assert(addr_next.value == 0x1000000) #TODO addr_next = SelectableInt(0x30000,64) # radix root for testing @@ -396,9 +438,12 @@ class RADIX: if not valid: return "invalid" # TODO: return error if leaf: + print ("is leaf, checking perms") ok = self._check_perms(data, priv, mode) if ok == True: # data was ok, found phys address, return it? - return addr_next + paddr = self._get_pte(addrsh, addr, data) + print (" phys addr", hex(paddr.value)) + return paddr return ok # return the error code else: newlookup = self._new_lookup(data, mbits, shift) @@ -411,13 +456,11 @@ class RADIX: print(mask) #SelectableInt(value=0x9, bits=4) print(pgbase) #SelectableInt(value=0x40000, bits=56) print(shift) #SelectableInt(value=0x4, bits=16) #FIXME - pgbase = SelectableInt(pgbase.value,64) + pgbase = SelectableInt(pgbase.value, 64) addrsh = addrshift(addr,shift) addr_next = self._get_pgtable_addr(mask, pgbase, addrsh) print("addr_next",addr_next) print("addrsh",addrsh) - assert(addr_next == 0x40000) - return "TODO verify next level" def _new_lookup(self, data, mbits, shift): """ @@ -483,7 +526,7 @@ class RADIX: # below *directly* match the spec, unlike microwatt which # has to turn them around (to LE) mask = genmask(shift, 44) - nonzero = addr[1:32] & mask[13:44] # mask 31 LSBs (BE numbered 13:44) + nonzero = addr[2:33] & mask[13:44] # mask 31 LSBs (BE numbered 13:44) print ("RADIX _segment_check nonzero", bin(nonzero.value)) print ("RADIX _segment_check addr[0-1]", addr[0].value, addr[1].value) if addr[0] != addr[1] or nonzero != 0: @@ -572,16 +615,16 @@ class RADIX: (effpid(31 downto 8) and finalmask(23 downto 0))) & effpid(7 downto 0) & "0000"; """ - print ("_get_prtable_addr_", shift, prtbl, addr, pid) + print ("_get_prtable_addr", shift, prtbl, addr, pid) finalmask = genmask(shift, 44) finalmask24 = finalmask[20:44] if addr[0].value == 1: effpid = SelectableInt(0, 32) else: effpid = pid #self.pid # TODO, check on this - zero16 = SelectableInt(0, 16) + zero8 = SelectableInt(0, 8) zero4 = SelectableInt(0, 4) - res = selectconcat(zero16, + res = selectconcat(zero8, prtbl[8:28], # (prtbl[28:52] & ~finalmask24) | # (effpid[0:24] & finalmask24), # @@ -614,16 +657,22 @@ class RADIX: (r.addr(55 downto 12) and finalmask)) & r.pde(11 downto 0); """ + shift.value = 12 finalmask = genmask(shift, 44) zero8 = SelectableInt(0, 8) + rpn = pde[8:52] # RPN = Real Page Number + abits = addr[8:52] # non-masked address bits + print(" get_pte RPN", hex(rpn.value)) + print(" abits", hex(abits.value)) + print(" shift", shift.value) + print(" finalmask", bin(finalmask.value)) res = selectconcat(zero8, - (pde[8:52] & ~finalmask) | # - (addr[8:52] & finalmask), # - pde[52:64], + (rpn & ~finalmask) | # + (abits & finalmask), # + addr[52:64], ) return res - class TestRadixMMU(unittest.TestCase): def test_genmask(self): @@ -631,7 +680,7 @@ class TestRadixMMU(unittest.TestCase): mask = genmask(shift, 43) print (" mask", bin(mask.value)) - self.assertEqual(sum([1, 2, 3]), 6, "Should be 6") + self.assertEqual(mask.value, 0b11111, "mask should be 5 1s") def test_get_pgtable_addr(self): @@ -643,10 +692,84 @@ class TestRadixMMU(unittest.TestCase): pgbase = SelectableInt(0,64) addrsh = SelectableInt(0,16) ret = dut._get_pgtable_addr(mask_size, pgbase, addrsh) - print("ret=",ret) - assert(ret==0) + print("ret=", ret) + self.assertEqual(ret, 0, "pgtbl_addr should be 0") + + def test_prtable_lookup(self): + + mem = None + caller = None + dut = RADIX(mem, caller) + + prtbl = SelectableInt(0x1000000,64) + addr = SelectableInt(0, 64) + pid = SelectableInt(0, 64) + ret = dut._prtable_lookup(prtbl, addr, pid) + + def test_walk_tree_1(self): + + # test address as in + # https://github.com/power-gem5/gem5/blob/gem5-experimental/src/arch/power/radix_walk_example.txt#L65 + testaddr = 0x1000 + expected = 0x1000 + + # set up dummy minimal ISACaller + spr = {'DSISR': SelectableInt(0, 64), + 'DAR': SelectableInt(0, 64), + 'PIDR': SelectableInt(0, 64), + 'PRTBL': SelectableInt(0, 64) + } + # set problem state == 0 (other unit tests, set to 1) + msr = SelectableInt(0, 64) + msr[MSRb.PR] = 0 + class ISACaller: pass + caller = ISACaller() + caller.spr = spr + caller.msr = msr + + shift = SelectableInt(5, 6) + mask = genmask(shift, 43) + print (" mask", bin(mask.value)) + + mem = Mem(row_bytes=8, initial_mem=testmem) + mem = RADIX(mem, caller) + # ----------------------------------------------- + # |/|RTS1|/| RPDB | RTS2 | RPDS | + # ----------------------------------------------- + # |0|1 2|3|4 55|56 58|59 63| + data = SelectableInt(0, 64) + data[1:3] = 0b01 + data[56:59] = 0b11 + data[59:64] = 0b01101 # mask + data[55] = 1 + (rts, mbits, pgbase) = mem._decode_prte(data) + print (" rts", bin(rts.value), rts.bits) + print (" mbits", bin(mbits.value), mbits.bits) + print (" pgbase", hex(pgbase.value), pgbase.bits) + addr = SelectableInt(0x1000, 64) + check = mem._segment_check(addr, mbits, shift) + print (" segment check", check) + + print("walking tree") + addr = SelectableInt(testaddr,64) + # pgbase = None + mode = None + #mbits = None + shift = rts + result = mem._walk_tree(addr, pgbase, mode, mbits, shift) + print(" walking tree result", result) + print("should be", testresult) + self.assertEqual(result.value, expected, + "expected 0x%x got 0x%x" % (expected, + result.value)) + + + def test_walk_tree_2(self): + + # test address slightly different + testaddr = 0x1101 + expected = 0x5001101 - def test_walk_tree(self): # set up dummy minimal ISACaller spr = {'DSISR': SelectableInt(0, 64), 'DAR': SelectableInt(0, 64), @@ -665,7 +788,7 @@ class TestRadixMMU(unittest.TestCase): mask = genmask(shift, 43) print (" mask", bin(mask.value)) - mem = Mem(row_bytes=8) + mem = Mem(row_bytes=8, initial_mem=testmem2) mem = RADIX(mem, caller) # ----------------------------------------------- # |/|RTS1|/| RPDB | RTS2 | RPDS | @@ -692,6 +815,10 @@ class TestRadixMMU(unittest.TestCase): shift = rts result = mem._walk_tree(addr, pgbase, mode, mbits, shift) print(" walking tree result", result) + print("should be", testresult) + self.assertEqual(result.value, expected, + "expected 0x%x got 0x%x" % (expected, + result.value)) if __name__ == '__main__': diff --git a/src/soc/decoder/power_decoder2.py b/src/soc/decoder/power_decoder2.py index abed03ec..c0d523d6 100644 --- a/src/soc/decoder/power_decoder2.py +++ b/src/soc/decoder/power_decoder2.py @@ -27,7 +27,8 @@ from soc.decoder.power_decoder import create_pdecode from soc.decoder.power_enums import (MicrOp, CryIn, Function, CRInSel, CROutSel, LdstLen, In1Sel, In2Sel, In3Sel, - OutSel, SPR, RC, LDSTMode, + OutSel, SPRfull, SPRreduced, + RC, LDSTMode, SVEXTRA, SVEtype, SVPtype) from soc.decoder.decode2execute1 import (Decode2ToExecute1Type, Data, Decode2ToOperand) @@ -65,13 +66,23 @@ class SPRMap(Elaboratable): """SPRMap: maps POWER9 SPR numbers to internal enum values, fast and slow """ - def __init__(self): + def __init__(self, regreduce_en): + self.regreduce_en = regreduce_en + if regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull + self.spr_i = Signal(10, reset_less=True) self.spr_o = Data(SPR, name="spr_o") self.fast_o = Data(3, name="fast_o") def elaborate(self, platform): m = Module() + if self.regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull with m.Switch(self.spr_i): for i, x in enumerate(SPR): with m.Case(x.value): @@ -90,7 +101,12 @@ class DecodeA(Elaboratable): decodes register RA, implicit and explicit CSRs """ - def __init__(self, dec): + def __init__(self, dec, regreduce_en): + self.regreduce_en = regreduce_en + if self.regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull self.dec = dec self.sel_in = Signal(In1Sel, reset_less=True) self.insn_in = Signal(32, reset_less=True) @@ -104,7 +120,7 @@ class DecodeA(Elaboratable): comb = m.d.comb op = self.dec.op reg = self.reg_out - m.submodules.sprmap = sprmap = SPRMap() + m.submodules.sprmap = sprmap = SPRMap(self.regreduce_en) # select Register A field, if *full 7 bits* are zero (2 more from SVP64) ra = Signal(5, reset_less=True) @@ -325,7 +341,12 @@ class DecodeOut(Elaboratable): decodes output register RA, RT or SPR """ - def __init__(self, dec): + def __init__(self, dec, regreduce_en): + self.regreduce_en = regreduce_en + if self.regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull self.dec = dec self.sel_in = Signal(OutSel, reset_less=True) self.insn_in = Signal(32, reset_less=True) @@ -336,7 +357,7 @@ class DecodeOut(Elaboratable): def elaborate(self, platform): m = Module() comb = m.d.comb - m.submodules.sprmap = sprmap = SPRMap() + m.submodules.sprmap = sprmap = SPRMap(self.regreduce_en) op = self.dec.op reg = self.reg_out @@ -684,9 +705,10 @@ class PowerDecodeSubset(Elaboratable): only fields actually requested are copied over. hence, "subset" (duh). """ def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None, - svp64_en=True): + svp64_en=True, regreduce_en=False): self.svp64_en = svp64_en + self.regreduce_en = regreduce_en if svp64_en: self.sv_rm = SVP64Rec(name="dec_svp64") # SVP64 RM field self.sv_a_nz = Signal(1) @@ -700,7 +722,8 @@ class PowerDecodeSubset(Elaboratable): # only needed for "main" PowerDecode2 if not self.final: - self.e = Decode2ToExecute1Type(name=self.fn_name, do=self.do) + self.e = Decode2ToExecute1Type(name=self.fn_name, do=self.do, + regreduce_en=regreduce_en) # create decoder if one not already given if dec is None: @@ -766,6 +789,10 @@ class PowerDecodeSubset(Elaboratable): return getattr(self.dec.op, op_field, None) def elaborate(self, platform): + if self.regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull m = Module() comb = m.d.comb state = self.state @@ -778,7 +805,8 @@ class PowerDecodeSubset(Elaboratable): name = "tmp" else: name = self.fn_name + "tmp" - self.e_tmp = Decode2ToExecute1Type(name=name, opkls=self.opkls) + self.e_tmp = Decode2ToExecute1Type(name=name, opkls=self.opkls, + regreduce_en=self.regreduce_en) # set up submodule decoders m.submodules.dec = self.dec @@ -909,8 +937,9 @@ class PowerDecode2(PowerDecodeSubset): """ def __init__(self, dec, opkls=None, fn_name=None, final=False, - state=None, svp64_en=True): - super().__init__(dec, opkls, fn_name, final, state, svp64_en) + state=None, svp64_en=True, regreduce_en=False): + super().__init__(dec, opkls, fn_name, final, state, svp64_en, + regreduce_en=False) self.exc = LDSTException("dec2_exc") if self.svp64_en: @@ -968,10 +997,10 @@ class PowerDecode2(PowerDecodeSubset): # copy over if non-exception, non-privileged etc. is detected # set up submodule decoders - m.submodules.dec_a = dec_a = DecodeA(self.dec) + m.submodules.dec_a = dec_a = DecodeA(self.dec, self.regreduce_en) m.submodules.dec_b = dec_b = DecodeB(self.dec) m.submodules.dec_c = dec_c = DecodeC(self.dec) - m.submodules.dec_o = dec_o = DecodeOut(self.dec) + m.submodules.dec_o = dec_o = DecodeOut(self.dec, self.regreduce_en) m.submodules.dec_o2 = dec_o2 = DecodeOut2(self.dec) m.submodules.dec_cr_in = self.dec_cr_in = DecodeCRIn(self.dec) m.submodules.dec_cr_out = self.dec_cr_out = DecodeCROut(self.dec) diff --git a/src/soc/decoder/power_enums.py b/src/soc/decoder/power_enums.py index faa8cfc1..3353d202 100644 --- a/src/soc/decoder/power_enums.py +++ b/src/soc/decoder/power_enums.py @@ -8,6 +8,10 @@ Note: for SV, from v3.1B p12: The designated SPR sandbox consists of non-privileged SPRs 704-719 and privileged SPRs 720-735. + +Note: the option exists to select a much shorter list of SPRs, to reduce +regfile size in HDL. this is SPRreduced and the supported list is in +get_spr_enum """ from enum import Enum, unique @@ -430,19 +434,35 @@ class CROutSel(Enum): # http://libre-riscv.org/openpower/isatables/sprs.csv # http://bugs.libre-riscv.org/show_bug.cgi?id=261 -spr_csv = get_csv("sprs.csv") -spr_info = namedtuple('spr_info', 'SPR priv_mtspr priv_mfspr length idx') -spr_dict = {} -spr_byname = {} -for row in spr_csv: - info = spr_info(SPR=row['SPR'], priv_mtspr=row['priv_mtspr'], - priv_mfspr=row['priv_mfspr'], length=int(row['len']), - idx=int(row['Idx'])) - spr_dict[int(row['Idx'])] = info - spr_byname[row['SPR']] = info -fields = [(row['SPR'], int(row['Idx'])) for row in spr_csv] -SPR = Enum('SPR', fields) - +def get_spr_enum(full_file): + """get_spr_enum - creates an Enum of SPRs, dynamically + has the option to reduce the enum to a much shorter list. + this saves drastically on the size of the regfile + """ + short_list = {'PIDR', 'DAR', 'PRTBL', 'DSISR', 'SVSRR0', 'SVSTATE', + 'SPRG0_priv', 'SPRG1_priv', 'SPRG2_priv', 'SPRG3_priv', + 'SPRG3' + } + spr_csv = [] + for row in get_csv("sprs.csv"): + if full_file or row['SPR'] in short_list: + spr_csv.append(row) + + spr_info = namedtuple('spr_info', 'SPR priv_mtspr priv_mfspr length idx') + spr_dict = {} + spr_byname = {} + for row in spr_csv: + info = spr_info(SPR=row['SPR'], priv_mtspr=row['priv_mtspr'], + priv_mfspr=row['priv_mfspr'], length=int(row['len']), + idx=int(row['Idx'])) + spr_dict[int(row['Idx'])] = info + spr_byname[row['SPR']] = info + fields = [(row['SPR'], int(row['Idx'])) for row in spr_csv] + SPR = Enum('SPR', fields) + return SPR, spr_dict, spr_byname + +SPRfull, spr_dict, spr_byname = get_spr_enum(full_file=True) +SPRreduced, _, _ = get_spr_enum(full_file=False) XER_bits = { 'SO': 32, @@ -454,11 +474,13 @@ XER_bits = { if __name__ == '__main__': # find out what the heck is in SPR enum :) - print("sprs", len(SPR)) - print(dir(SPR)) + print("sprs full", len(SPRfull)) + print(dir(SPRfull)) + print("sprs reduced", len(SPRreduced)) + print(dir(SPRreduced)) print(dir(Enum)) - print(SPR.__members__['TAR']) - for x in SPR: + print(SPRfull.__members__['TAR']) + for x in SPRfull: print(x, x.value, str(x), x.name) print("function", Function.ALU.name) diff --git a/src/soc/decoder/pseudo/pywriter.py b/src/soc/decoder/pseudo/pywriter.py index a274dccb..77ff775e 100644 --- a/src/soc/decoder/pseudo/pywriter.py +++ b/src/soc/decoder/pseudo/pywriter.py @@ -131,15 +131,16 @@ class PyISAWriter(ISA): if __name__ == '__main__': isa = PyISAWriter() + write_isa_class = True if len(sys.argv) == 1: # quick way to do it print(dir(isa)) sources = isa.page.keys() else: sources = sys.argv[1:] - write_isa_class = True - if sources[0] == "noall": # don't rewrite all.py - write_isa_class = False - sources.pop(0) + if sources[0] == "noall": # don't rewrite all.py + write_isa_class = False + sources.pop(0) + print ("sources", write_isa_class, sources) for source in sources: isa.write_pysource(source) isa.patch_if_needed(source) diff --git a/src/soc/fu/alu/test/svp64_cases.py b/src/soc/fu/alu/test/svp64_cases.py index d7341224..18da37ae 100644 --- a/src/soc/fu/alu/test/svp64_cases.py +++ b/src/soc/fu/alu/test/svp64_cases.py @@ -228,7 +228,7 @@ class SVP64ALUTestCase(TestAccumulatorBase): # expected results: # r5 = 0x0 dest r3 is 0b10: skip # r6 = 0xffff_ffff_ffff_ff91 2nd bit of r3 is 1 - isa = SVP64Asm(['sv.extsb/sm=~r3/m=r3 5.v, 9.v']) + isa = SVP64Asm(['sv.extsb/sm=~r3/dm=r3 5.v, 9.v']) lst = list(isa) print("listing", lst) @@ -246,7 +246,6 @@ class SVP64ALUTestCase(TestAccumulatorBase): self.add_case(Program(lst, bigendian), initial_regs, initial_svstate=svstate) - @skip_case("Predication not implemented yet") def case_10_intpred_vcompress(self): # reg num 0 1 2 3 4 5 6 7 8 9 10 11 # src r3=0b101 Y N Y @@ -293,7 +292,7 @@ class SVP64ALUTestCase(TestAccumulatorBase): # r5 = 0xffff_ffff_ffff_ff90 1st bit of r3 is 1 # r6 = 0x0 skip # r7 = 0xffff_ffff_ffff_ff91 3nd bit of r3 is 1 - isa = SVP64Asm(['sv.extsb/m=r3 5.v, 9.v']) + isa = SVP64Asm(['sv.extsb/dm=r3 5.v, 9.v']) lst = list(isa) print("listing", lst) @@ -311,3 +310,35 @@ class SVP64ALUTestCase(TestAccumulatorBase): self.add_case(Program(lst, bigendian), initial_regs, initial_svstate=svstate) + + @skip_case("Predication not implemented yet") + def case_12_sv_twinpred(self): + # reg num 0 1 2 3 4 5 6 7 8 9 10 11 + # src r3=0b101 Y N Y + # | + # +-----+ + # | + # dest ~r3=0b010 N Y N + + # expected results: + # r5 = 0x0 dest ~r3 is 0b010: skip + # r6 = 0xffff_ffff_ffff_ff90 2nd bit of ~r3 is 1 + # r7 = 0x0 dest ~r3 is 0b010: skip + isa = SVP64Asm(['sv.extsb/sm=r3/dm=~r3 5.v, 9.v']) + lst = list(isa) + print("listing", lst) + + # initial values in GPR regfile + initial_regs = [0] * 32 + initial_regs[3] = 0b101 # predicate mask + initial_regs[9] = 0x90 # source r3 is 0b101 so this will be used + initial_regs[10] = 0x91 # this gets skipped + initial_regs[11] = 0x92 # VL loop runs out before we can use it + # SVSTATE (in this case, VL=3) + svstate = SVP64State() + svstate.vl[0:7] = 3 # VL + svstate.maxvl[0:7] = 3 # MAXVL + print("SVSTATE", bin(svstate.spr.asint())) + + self.add_case(Program(lst, bigendian), initial_regs, + initial_svstate=svstate) diff --git a/src/soc/fu/mmu/fsm.py b/src/soc/fu/mmu/fsm.py index bd0c36ee..76cff311 100644 --- a/src/soc/fu/mmu/fsm.py +++ b/src/soc/fu/mmu/fsm.py @@ -10,7 +10,7 @@ from soc.experiment.dcache import DCache from soc.decoder.power_fields import DecodeFields from soc.decoder.power_fieldsn import SignalBitRange from soc.decoder.power_decoder2 import decode_spr_num -from soc.decoder.power_enums import MicrOp, SPR, XER_bits +from soc.decoder.power_enums import MicrOp, XER_bits from soc.experiment.pimem import PortInterface from soc.experiment.pimem import PortInterfaceBase diff --git a/src/soc/fu/spr/main_stage.py b/src/soc/fu/spr/main_stage.py index 1a1d5d4e..cca0c24e 100644 --- a/src/soc/fu/spr/main_stage.py +++ b/src/soc/fu/spr/main_stage.py @@ -7,7 +7,7 @@ from nmigen import (Module, Signal, Cat) from nmutil.pipemodbase import PipeModBase from soc.fu.spr.pipe_data import SPRInputData, SPROutputData -from soc.decoder.power_enums import MicrOp, SPR, XER_bits +from soc.decoder.power_enums import MicrOp, SPRfull, SPRreduced, XER_bits from soc.decoder.power_fields import DecodeFields from soc.decoder.power_fieldsn import SignalBitRange @@ -17,6 +17,10 @@ from soc.decoder.power_decoder2 import decode_spr_num class SPRMainStage(PipeModBase): def __init__(self, pspec): super().__init__(pspec, "spr_main") + # test if regfiles are reduced + self.regreduce_en = (hasattr(pspec, "regreduce") and + (pspec.regreduce == True)) + self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) self.fields.create_specs() @@ -27,6 +31,10 @@ class SPRMainStage(PipeModBase): return SPROutputData(self.pspec) def elaborate(self, platform): + if self.regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull m = Module() comb = m.d.comb op = self.i.ctx.op diff --git a/src/soc/litex/florent b/src/soc/litex/florent index cba78e3d..47083f35 160000 --- a/src/soc/litex/florent +++ b/src/soc/litex/florent @@ -1 +1 @@ -Subproject commit cba78e3d60e6f67ce99adbdcbf6a09f9728dc849 +Subproject commit 47083f3531935d83fd1dfe98faf465cad8804cff diff --git a/src/soc/regfile/regfiles.py b/src/soc/regfile/regfiles.py index 2512d3ae..167ae511 100644 --- a/src/soc/regfile/regfiles.py +++ b/src/soc/regfile/regfiles.py @@ -26,7 +26,7 @@ Links: from soc.regfile.regfile import RegFile, RegFileArray, RegFileMem from soc.regfile.virtual_port import VirtualRegPort -from soc.decoder.power_enums import SPR +from soc.decoder.power_enums import SPRfull, SPRreduced # "State" Regfile @@ -48,7 +48,7 @@ class StateRegs(RegFileArray): PC = 0 MSR = 1 SVSTATE = 2 - def __init__(self): + def __init__(self, svp64_en=False, regreduce_en=False): super().__init__(64, 3) self.w_ports = {'nia': self.write_port("nia"), 'msr': self.write_port("msr"), @@ -69,16 +69,21 @@ class IntRegs(RegFileMem): #class IntRegs(RegFileArray): * Array-based unary-indexed (not binary-indexed) * write-through capability (read on same cycle as write) """ - def __init__(self): + def __init__(self, svp64_en=False, regreduce_en=False): super().__init__(64, 32) self.w_ports = {'o': self.write_port("dest1"), - #'o1': self.write_port("dest2") # for now (LD/ST update) } - self.r_ports = {'ra': self.read_port("src1"), - 'rb': self.read_port("src2"), - 'rc': self.read_port("src3"), - 'pred': self.read_port("pred"), # for predicate mask + self.r_ports = { 'dmi': self.read_port("dmi")} # needed for Debug (DMI) + if svp64_en: + self.r_ports['pred'] = self.read_port("pred") # for predicate mask + if not regreduce_en: + self.w_ports['o1'] = self.write_port("dest2") # (LD/ST update) + self.r_ports['ra'] = self.read_port("src1") + self.r_ports['rb'] = self.read_port("src2") + self.r_ports['rc'] = self.read_port("src3") + else: + self.r_ports['rabc'] = self.read_port("src1") # Fast SPRs Regfile @@ -103,15 +108,16 @@ class FastRegs(RegFileMem): #RegFileArray): DEC = 6 TB = 7 N_REGS = 8 # maximum number of regs - def __init__(self): + def __init__(self, svp64_en=False, regreduce_en=False): super().__init__(64, self.N_REGS) self.w_ports = {'fast1': self.write_port("dest1"), 'issue': self.write_port("issue"), # writing DEC/TB } self.r_ports = {'fast1': self.read_port("src1"), - 'fast2': self.read_port("src2"), 'issue': self.read_port("issue"), # reading DEC/TB } + if not regreduce_en: + self.r_ports['fast2'] = self.read_port("src2") # CR Regfile @@ -123,17 +129,18 @@ class CRRegs(VirtualRegPort): * Array-based unary-indexed (not binary-indexed) * write-through capability (read on same cycle as write) """ - def __init__(self): + def __init__(self, svp64_en=False, regreduce_en=False): super().__init__(32, 8, rd2=True) self.w_ports = {'full_cr': self.full_wr, # 32-bit (masked, 8-en lines) 'cr_a': self.write_port("dest1"), # 4-bit, unary-indexed 'cr_b': self.write_port("dest2")} # 4-bit, unary-indexed self.r_ports = {'full_cr': self.full_rd, # 32-bit (masked, 8-en lines) 'full_cr_dbg': self.full_rd2, # for DMI - 'cr_pred': self.read_port("cr_pred"), # for predicate 'cr_a': self.read_port("src1"), 'cr_b': self.read_port("src2"), 'cr_c': self.read_port("src3")} + if svp64_en: + self.r_ports['cr_pred'] = self.read_port("cr_pred") # for predicate # XER Regfile @@ -148,7 +155,7 @@ class XERRegs(VirtualRegPort): SO=0 # this is actually 2-bit but we ignore 1 bit of it CA=1 # CA and CA32 OV=2 # OV and OV32 - def __init__(self): + def __init__(self, svp64_en=False, regreduce_en=False): super().__init__(6, 3) self.w_ports = {'full_xer': self.full_wr, # 6-bit (masked, 3-en lines) 'xer_so': self.write_port("dest1"), @@ -169,8 +176,11 @@ class SPRRegs(RegFileMem): * binary-indexed but REQUIRES MAPPING * write-through capability (read on same cycle as write) """ - def __init__(self): - n_sprs = len(SPR) + def __init__(self, svp64_en=False, regreduce_en=False): + if regreduce_en: + n_sprs = len(SPRreduced) + else: + n_sprs = len(SPRfull) super().__init__(width=64, depth=n_sprs) self.w_ports = {'spr1': self.write_port("spr1")} self.r_ports = {'spr1': self.read_port("spr1")} @@ -178,7 +188,14 @@ class SPRRegs(RegFileMem): # class containing all regfiles: int, cr, xer, fast, spr class RegFiles: - def __init__(self): + def __init__(self, pspec): + # test is SVP64 is to be enabled + svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True) + + # and regfile port reduction + regreduce_en = hasattr(pspec, "regreduce") and \ + (pspec.regreduce == True) + self.rf = {} # create regfiles here, Factory style for (name, kls) in [('int', IntRegs), @@ -187,7 +204,7 @@ class RegFiles: ('fast', FastRegs), ('state', StateRegs), ('spr', SPRRegs),]: - rf = self.rf[name] = kls() + rf = self.rf[name] = kls(svp64_en, regreduce_en) # also add these as instances, self.state, self.fast, self.cr etc. setattr(self, name, rf) diff --git a/src/soc/regfile/util.py b/src/soc/regfile/util.py index 5ccc4c54..e5f095dc 100644 --- a/src/soc/regfile/util.py +++ b/src/soc/regfile/util.py @@ -1,6 +1,8 @@ from soc.regfile.regfiles import FastRegs -from soc.decoder.power_enums import SPR, spr_dict +from soc.decoder.power_enums import SPRfull as SPR, spr_dict +# note that we can get away with using SPRfull here because the values +# (numerical values) are what is used for lookup. spr_to_fast = { SPR.CTR: FastRegs.CTR, SPR.LR: FastRegs.LR, SPR.TAR: FastRegs.TAR, diff --git a/src/soc/simple/core.py b/src/soc/simple/core.py index 23e12f37..91e639a5 100644 --- a/src/soc/simple/core.py +++ b/src/soc/simple/core.py @@ -71,6 +71,13 @@ class NonProductionCore(Elaboratable): def __init__(self, pspec): self.pspec = pspec + # test is SVP64 is to be enabled + self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True) + + # test to see if regfile ports should be reduced + self.regreduce_en = (hasattr(pspec, "regreduce") and + (pspec.regreduce == True)) + # single LD/ST funnel for memory access self.l0 = TstL0CacheBuffer(pspec, n_units=1) pi = self.l0.l0.dports[0] @@ -80,10 +87,11 @@ class NonProductionCore(Elaboratable): self.fus = AllFunctionUnits(pspec, pilist=[pi]) # register files (yes plural) - self.regs = RegFiles() + self.regs = RegFiles(pspec) # instruction decoder - needs a Trap-capable Record (captures EINT etc.) - self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand) + self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand, + regreduce_en=self.regreduce_en) # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero self.sv_a_nz = Signal() @@ -115,7 +123,9 @@ class NonProductionCore(Elaboratable): continue self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name, final=True, - state=self.state) + state=self.state, + svp64_en=self.svp64_en, + regreduce_en=self.regreduce_en) self.des[funame] = self.decoders[funame].do if "mmu0" in self.decoders: @@ -339,14 +349,15 @@ class NonProductionCore(Elaboratable): # argh. an experiment to merge RA and RB in the INT regfile # (we have too many read/write ports) - #if regfile == 'INT': - #fuspecs['rabc'] = [fuspecs.pop('rb')] - #fuspecs['rabc'].append(fuspecs.pop('rc')) - #fuspecs['rabc'].append(fuspecs.pop('ra')) - #if regfile == 'FAST': - # fuspecs['fast1'] = [fuspecs.pop('fast1')] - # if 'fast2' in fuspecs: - # fuspecs['fast1'].append(fuspecs.pop('fast2')) + if self.regreduce_en: + if regfile == 'INT': + fuspecs['rabc'] = [fuspecs.pop('rb')] + fuspecs['rabc'].append(fuspecs.pop('rc')) + fuspecs['rabc'].append(fuspecs.pop('ra')) + if regfile == 'FAST': + fuspecs['fast1'] = [fuspecs.pop('fast1')] + if 'fast2' in fuspecs: + fuspecs['fast1'].append(fuspecs.pop('fast2')) # for each named regfile port, connect up all FUs to that port for (regname, fspec) in sort_fuspecs(fuspecs): @@ -464,14 +475,15 @@ class NonProductionCore(Elaboratable): fuspecs = byregfiles_wrspec[regfile] wrpickers[regfile] = {} - # argh, more port-merging - if regfile == 'INT': - fuspecs['o'] = [fuspecs.pop('o')] - fuspecs['o'].append(fuspecs.pop('o1')) - if regfile == 'FAST': - fuspecs['fast1'] = [fuspecs.pop('fast1')] - if 'fast2' in fuspecs: - fuspecs['fast1'].append(fuspecs.pop('fast2')) + if self.regreduce_en: + # argh, more port-merging + if regfile == 'INT': + fuspecs['o'] = [fuspecs.pop('o')] + fuspecs['o'].append(fuspecs.pop('o1')) + if regfile == 'FAST': + fuspecs['fast1'] = [fuspecs.pop('fast1')] + if 'fast2' in fuspecs: + fuspecs['fast1'].append(fuspecs.pop('fast2')) for (regname, fspec) in sort_fuspecs(fuspecs): self.connect_wrport(m, fu_bitdict, wrpickers, diff --git a/src/soc/simple/issuer.py b/src/soc/simple/issuer.py index aaed3508..e2f66a50 100644 --- a/src/soc/simple/issuer.py +++ b/src/soc/simple/issuer.py @@ -16,11 +16,13 @@ improved. """ from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal, - ClockDomain, DomainRenamer, Mux, Const, Repl) + ClockDomain, DomainRenamer, Mux, Const, Repl, Cat) from nmigen.cli import rtlil from nmigen.cli import main import sys +from nmigen.lib.coding import PriorityEncoder + from soc.decoder.power_decoder import create_pdecode from soc.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder from soc.decoder.decode2execute1 import IssuerDecode2ToOperand @@ -158,6 +160,10 @@ class TestIssuerInternal(Elaboratable): # test is SVP64 is to be enabled self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True) + # and if regfiles are reduced + self.regreduce_en = (hasattr(pspec, "regreduce") and + (pspec.regreduce == True)) + # JTAG interface. add this right at the start because if it's # added it *modifies* the pspec, by adding enable/disable signals # for parts of the rest of the core @@ -207,7 +213,8 @@ class TestIssuerInternal(Elaboratable): self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE) self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state, opkls=IssuerDecode2ToOperand, - svp64_en=self.svp64_en) + svp64_en=self.svp64_en, + regreduce_en=self.regreduce_en) if self.svp64_en: self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix @@ -241,9 +248,10 @@ class TestIssuerInternal(Elaboratable): self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read self.xer_r = xerrf.r_ports['full_xer'] # XER read - # for predication - self.int_pred = intrf.r_ports['pred'] # INT predicate read - self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read + if self.svp64_en: + # for predication + self.int_pred = intrf.r_ports['pred'] # INT predicate read + self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read # hack method of keeping an eye on whether branch/trap set the PC self.state_nia = self.core.regs.rf['state'].w_ports['nia'] @@ -384,6 +392,8 @@ class TestIssuerInternal(Elaboratable): later, a faster way would be to use the 32-bit-wide CR port but this is more complex decoding, here. equivalent code used in ISACaller is "from soc.decoder.isa.caller import get_predcr" + + note: this ENTIRE FSM is not to be called when svp64 is disabled """ comb = m.d.comb sync = m.d.sync @@ -533,17 +543,10 @@ class TestIssuerInternal(Elaboratable): comb += update_svstate.eq(1) sync += sv_changed.eq(1) - # decode the instruction when it arrives + # wait for an instruction to arrive from Fetch with m.State("INSN_WAIT"): comb += fetch_insn_ready_i.eq(1) with m.If(fetch_insn_valid_o): - # decode the instruction - sync += core.e.eq(pdecode2.e) - sync += core.state.eq(cur_state) - sync += core.raw_insn_i.eq(dec_opcode_i) - sync += core.bigendian_i.eq(self.core_bigendian_i) - # set RA_OR_ZERO detection in satellite decoders - sync += core.sv_a_nz.eq(pdecode2.sv_a_nz) # loop into ISSUE_START if it's a SVP64 instruction # and VL == 0. this because VL==0 is a for-loop # from 0 to 0 i.e. always, always a NOP. @@ -560,7 +563,7 @@ class TestIssuerInternal(Elaboratable): if self.svp64_en: m.next = "PRED_START" # start fetching predicate else: - m.next = "INSN_EXECUTE" # skip predication + m.next = "DECODE_SV" # skip predication with m.State("PRED_START"): comb += pred_insn_valid_i.eq(1) # tell fetch_pred to start @@ -570,42 +573,79 @@ class TestIssuerInternal(Elaboratable): with m.State("MASK_WAIT"): comb += pred_mask_ready_i.eq(1) # ready to receive the masks with m.If(pred_mask_valid_o): # predication masks are ready - m.next = "INSN_EXECUTE" - - # handshake with execution FSM, move to "wait" once acknowledged - with m.State("INSN_EXECUTE"): - # with m.If(is_svp64_mode): - # TODO advance src/dst step to "skip" over predicated-out - # from self.srcmask and self.dstmask - # https://bugs.libre-soc.org/show_bug.cgi?id=617#c3 - # but still without exceeding VL in either case - # IMPORTANT: when changing src/dest step, have to - # jump to m.next = "DECODE_SV" to deal with the change in - # SVSTATE - - with m.If(is_svp64_mode): + m.next = "PRED_SKIP" + # skip zeros in predicate + with m.State("PRED_SKIP"): + with m.If(~is_svp64_mode): + m.next = "DECODE_SV" # nothing to do + with m.Else(): if self.svp64_en: pred_src_zero = pdecode2.rm_dec.pred_sz pred_dst_zero = pdecode2.rm_dec.pred_dz - """ - if not pred_src_zero: - if (((1<> (src_delta+1)) + + # same as above, but for dststep + skip_dststep = Signal.like(cur_dststep) + dst_delta = Signal.like(cur_dststep) + with m.If(~pred_dst_zero): + pri_enc_dst = PriorityEncoder(65) + m.submodules.pri_enc_dst = pri_enc_dst + comb += pri_enc_dst.i.eq(Cat(self.dstmask, 1)) + comb += dst_delta.eq(pri_enc_dst.o) + comb += skip_dststep.eq(cur_dststep + dst_delta) + sync += self.dstmask.eq(self.dstmask >> (dst_delta+1)) + + # TODO: initialize mask[VL]=1 to avoid passing past VL + with m.If((skip_srcstep >= cur_vl) | + (skip_dststep >= cur_vl)): + # end of VL loop. Update PC and reset src/dst step + comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) + comb += self.state_w_pc.data_i.eq(nia) + comb += new_svstate.srcstep.eq(0) + comb += new_svstate.dststep.eq(0) comb += update_svstate.eq(1) - comb += new_svstate.srcstep.eq(next_srcstep) - - if not pred_dst_zero: - if (((1<