From 6b41f65aa7a42837d14d172c57b261b0e660291a Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Thu, 7 Apr 2022 13:32:41 -0500 Subject: [PATCH] Add initial support for external DRAM init on the Raptor Versa ECP5-85 board --- LICENSE | 1 + examples/ecp5_crg.py | 257 ++++++++++++++++++++++++++++++++++ examples/headless-versa-85.py | 84 +++++++++++ examples/headless/main.c | 71 +++++++++- libgram/Makefile | 4 +- libgram/src/calibration.c | 16 +++ 6 files changed, 429 insertions(+), 4 deletions(-) create mode 100644 examples/ecp5_crg.py create mode 100644 examples/headless-versa-85.py diff --git a/LICENSE b/LICENSE index 991cbcf..c4f0af0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,5 @@ Unless otherwise noted, Gram is Copyright 2020 / LambdaConcept +ECP5 DDR3 fixes and extensions Copyright 2022 Raptor Engineering, LLC Initial development is based on MiSoC's LASMICON / Copyright 2007-2016 / M-Labs LiteDRAM / Copyright 2012-2018 / EnjoyDigital diff --git a/examples/ecp5_crg.py b/examples/ecp5_crg.py new file mode 100644 index 0000000..5c975d6 --- /dev/null +++ b/examples/ecp5_crg.py @@ -0,0 +1,257 @@ +# Copyright (c) 2020 LambdaConcept +# Copyright (c) 2021 Luke Kenneth Casson Leighton +# Copyright (c) 2018-2020 Florent Kermarrec +# Copyright (c) 2019 Michael Betz +# +# Based on code from LambaConcept, from the gram example which is BSD-2-License +# https://github.com/jeanthom/gram/tree/master/examples +# +# Modifications for the Libre-SOC Project funded by NLnet and NGI POINTER +# under EU Grants 871528 and 957073, under the LGPLv3+ License + + +from nmigen import (Elaboratable, Module, Signal, ClockDomain, Instance, + ClockSignal, ResetSignal, Const) + +__all__ = ["ECP5CRG"] + + +class PLL(Elaboratable): + nclkouts_max = 3 + clki_div_range = (1, 128+1) + clkfb_div_range = (1, 128+1) + clko_div_range = (1, 128+1) + clki_freq_range = ( 8e6, 400e6) + clko_freq_range = (3.125e6, 400e6) + vco_freq_range = ( 400e6, 800e6) + + def __init__(self, clkin, + clksel=Signal(shape=2, reset=2), + reset=Signal(reset_less=True), + locked=Signal()): + self.clkin = clkin + self.clkin_freq = None + self.clksel = clksel + self.locked = locked + self.reset = reset + self.nclkouts = 0 + self.clkouts = {} + self.config = {} + self.params = {} + + def ports(self): + return [ + self.clkin, + self.clksel, + self.lock, + ] + list(self.clkouts.values()) + + def set_clkin_freq(self, freq): + (clki_freq_min, clki_freq_max) = self.clki_freq_range + assert freq >= clki_freq_min + assert freq <= clki_freq_max + self.clkin_freq = freq + + def create_clkout(self, cd, freq, phase=0, margin=1e-2): + (clko_freq_min, clko_freq_max) = self.clko_freq_range + assert freq >= clko_freq_min + assert freq <= clko_freq_max + assert self.nclkouts < self.nclkouts_max + self.clkouts[self.nclkouts] = (cd, freq, phase, margin) + #create_clkout_log(self.logger, cd.name, freq, margin, self.nclkouts) + print("clock domain", cd.domain, freq, margin, self.nclkouts) + self.nclkouts += 1 + + def compute_config(self): + config = {} + for clki_div in range(*self.clki_div_range): + config["clki_div"] = clki_div + for clkfb_div in range(*self.clkfb_div_range): + all_valid = True + vco_freq = self.clkin_freq/clki_div*clkfb_div*1 # clkos3_div=1 + (vco_freq_min, vco_freq_max) = self.vco_freq_range + if vco_freq >= vco_freq_min and vco_freq <= vco_freq_max: + for n, (clk, f, p, m) in sorted(self.clkouts.items()): + valid = False + for d in range(*self.clko_div_range): + clk_freq = vco_freq/d + if abs(clk_freq - f) <= f*m: + config["clko{}_freq".format(n)] = clk_freq + config["clko{}_div".format(n)] = d + config["clko{}_phase".format(n)] = p + valid = True + break + if not valid: + all_valid = False + else: + all_valid = False + if all_valid: + config["vco"] = vco_freq + config["clkfb_div"] = clkfb_div + #compute_config_log(self.logger, config) + print ("PLL config", config) + return config + raise ValueError("No PLL config found") + + def elaborate(self, platform): + config = self.compute_config() + clkfb = Signal() + self.params.update( + # attributes + a_FREQUENCY_PIN_CLKI = str(self.clkin_freq/1e6), + a_ICP_CURRENT = "6", + a_LPF_RESISTOR = "16", + a_MFG_ENABLE_FILTEROPAMP = "1", + a_MFG_GMCREF_SEL = "2", + # parameters + p_FEEDBK_PATH = "INT_OS3", # CLKOS3 rsvd for feedback with div=1. + p_CLKOS3_ENABLE = "ENABLED", + p_CLKOS3_DIV = 1, + p_CLKFB_DIV = config["clkfb_div"], + p_CLKI_DIV = config["clki_div"], + # reset, input clock, lock-achieved output + i_RST = self.reset, + i_CLKI = self.clkin, + o_LOCK = self.locked, + ) + # for each clock-out, set additional parameters + for n, (clk, f, p, m) in sorted(self.clkouts.items()): + n_to_l = {0: "P", 1: "S", 2: "S2"} + div = config["clko{}_div".format(n)] + cphase = int(p*(div + 1)/360 + div) + self.params["p_CLKO{}_ENABLE".format(n_to_l[n])] = "ENABLED" + self.params["p_CLKO{}_DIV".format(n_to_l[n])] = div + self.params["p_CLKO{}_FPHASE".format(n_to_l[n])] = 0 + self.params["p_CLKO{}_CPHASE".format(n_to_l[n])] = cphase + self.params["o_CLKO{}".format(n_to_l[n])] = clk + + m = Module() + print ("params", self.params) + pll = Instance("EHXPLLL", **self.params) + m.submodules.pll = pll + return m + + pll = Instance("EHXPLLL", + p_OUTDIVIDER_MUXA='DIVA', + p_OUTDIVIDER_MUXB='DIVB', + p_CLKOP_ENABLE='ENABLED', + p_CLKOS_ENABLE='ENABLED', + p_CLKOS2_ENABLE='DISABLED', + p_CLKOS3_ENABLE='DISABLED', + p_CLKOP_DIV=self.CLKOP_DIV, + p_CLKOS_DIV=self.CLKOS_DIV, + p_CLKFB_DIV=self.CLKFB_DIV, + p_CLKI_DIV=self.CLKI_DIV, + p_FEEDBK_PATH='INT_OP', + p_CLKOP_TRIM_POL="FALLING", + p_CLKOP_TRIM_DELAY=0, + p_CLKOS_TRIM_POL="FALLING", + p_CLKOS_TRIM_DELAY=0, + i_CLKI=self.clkin, + i_RST=0, + i_STDBY=0, + i_PHASESEL0=0, + i_PHASESEL1=0, + i_PHASEDIR=0, + i_PHASESTEP=0, + i_PHASELOADREG=0, + i_PLLWAKESYNC=0, + i_ENCLKOP=1, + i_ENCLKOS=1, + i_ENCLKOS2=0, + i_ENCLKOS3=0, + o_CLKOP=self.clkout1, + o_CLKOS=self.clkout2, + o_CLKOS2=self.clkout3, + o_CLKOS3=self.clkout4, + o_LOCK=self.lock, + ) + + +class ECP5CRG(Elaboratable): + def __init__(self, sys_clk_freq=100e6, pod_bits=25): + self.sys_clk_freq = sys_clk_freq + self.pod_bits = pod_bits + + def elaborate(self, platform): + m = Module() + + # Get 100Mhz from oscillator + extclk = platform.request(platform.default_clk) + cd_rawclk = ClockDomain("rawclk", local=True, reset_less=True) + m.d.comb += cd_rawclk.clk.eq(extclk) + m.domains += cd_rawclk + + # Reset + if platform.default_rst is not None: + reset = platform.request(platform.default_rst).i + else: + reset = Const(0) # whoops + + gsr0 = Signal() + gsr1 = Signal() + + m.submodules += [ + Instance("FD1S3AX", p_GSR="DISABLED", + i_CK=ClockSignal("rawclk"), + i_D=~reset, + o_Q=gsr0), + Instance("FD1S3AX", p_GSR="DISABLED", + i_CK=ClockSignal("rawclk"), + i_D=gsr0, + o_Q=gsr1), + Instance("SGSR", i_CLK=ClockSignal("rawclk"), + i_GSR=gsr1), + ] + + # PLL + m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~reset) + + # Power-on delay (655us) + podcnt = Signal(self.pod_bits, reset=-1) + pod_done = Signal() + with m.If((podcnt != 0) & pll.locked): + m.d.rawclk += podcnt.eq(podcnt-1) + m.d.rawclk += pod_done.eq(podcnt == 0) + + # Generating sync2x (200Mhz) and init (25Mhz) from extclk + cd_sync2x = ClockDomain("sync2x", local=False) + cd_sync2x_unbuf = ClockDomain("sync2x_unbuf", + local=False, reset_less=True) + cd_init = ClockDomain("init", local=False) + cd_sync = ClockDomain("sync", local=False) + cd_dramsync = ClockDomain("dramsync", local=False) + + # create PLL clocks + pll.set_clkin_freq(platform.default_clk_frequency) + pll.create_clkout(ClockSignal("sync2x_unbuf"), 2*self.sys_clk_freq) + pll.create_clkout(ClockSignal("init"), 25e6) + m.submodules += Instance("ECLKSYNCB", + i_ECLKI = ClockSignal("sync2x_unbuf"), + i_STOP = 0, + o_ECLKO = ClockSignal("sync2x")) + m.domains += cd_sync2x_unbuf + m.domains += cd_sync2x + m.domains += cd_init + m.domains += cd_sync + m.domains += cd_dramsync + reset_ok = Signal(reset_less=True) + m.d.comb += reset_ok.eq(~pll.locked|~pod_done) + m.d.comb += ResetSignal("init").eq(reset_ok) + m.d.comb += ResetSignal("sync").eq(reset_ok) + m.d.comb += ResetSignal("dramsync").eq(reset_ok) + + # # Generating sync (100Mhz) from sync2x + + m.submodules += Instance("CLKDIVF", + p_DIV="2.0", + i_ALIGNWD=0, + i_CLKI=ClockSignal("sync2x"), + i_RST=0, + o_CDIVX=ClockSignal("sync")) + + # temporarily set dram sync clock exactly equal to main sync + m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync")) + + return m + diff --git a/examples/headless-versa-85.py b/examples/headless-versa-85.py new file mode 100644 index 0000000..3254115 --- /dev/null +++ b/examples/headless-versa-85.py @@ -0,0 +1,84 @@ +# This file is Copyright (c) 2020 LambdaConcept +# This file is Copyright (c) 2022 Raptor Engineering, LLC + +from nmigen import * +from nmigen.lib.cdc import ResetSynchronizer +from nmigen_soc import wishbone, memory + +from lambdasoc.cpu.minerva import MinervaCPU +from lambdasoc.periph.intc import GenericInterruptController +from lambdasoc.periph.serial import AsyncSerialPeripheral +from lambdasoc.periph.sram import SRAMPeripheral +from lambdasoc.periph.timer import TimerPeripheral +from lambdasoc.periph import Peripheral +from lambdasoc.soc.base import SoC + +from gram.core import gramCore +from gram.phy.ecp5ddrphy import ECP5DDRPHY +from gram.modules import MT41K64M16 +from gram.frontend.wishbone import gramWishbone + +from nmigen_boards.versa_ecp5 import VersaECP5Platform85 +from ecp5_crg import ECP5CRG +from uartbridge import UARTBridge +from crg import * + +class DDR3SoC(SoC, Elaboratable): + def __init__(self, *, + ddrphy_addr, dramcore_addr, + ddr_addr): + self._decoder = wishbone.Decoder(addr_width=30, data_width=32, granularity=8, + features={"cti", "bte"}) + + self.crg = ECP5CRG() + + self.ub = UARTBridge(divisor=868, pins=platform.request("uart", 0)) + + ddr_pins = platform.request("ddr3", 0, dir={"dq":"-", "dqs":"-"}, + xdr={"clk":4, "a":4, "ba":4, "clk_en":4, "odt":4, "ras":4, "cas":4, "we":4, "cs":4, "reset":4}) + self.ddrphy = DomainRenamer("dramsync")(ECP5DDRPHY(ddr_pins)) + self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr) + + ddrmodule = MT41K64M16(platform.default_clk_frequency, "1:2") + + self.dramcore = DomainRenamer("dramsync")(gramCore( + phy=self.ddrphy, + geom_settings=ddrmodule.geom_settings, + timing_settings=ddrmodule.timing_settings, + clk_freq=platform.default_clk_frequency)) + self._decoder.add(self.dramcore.bus, addr=dramcore_addr) + + self.drambone = DomainRenamer("dramsync")(gramWishbone(self.dramcore)) + self._decoder.add(self.drambone.bus, addr=ddr_addr) + + self.memory_map = self._decoder.bus.memory_map + + self.clk_freq = platform.default_clk_frequency + + def elaborate(self, platform): + m = Module() + + m.submodules.sysclk = self.crg + + m.submodules.ub = self.ub + + m.submodules.decoder = self._decoder + m.submodules.ddrphy = self.ddrphy + m.submodules.dramcore = self.dramcore + m.submodules.drambone = self.drambone + + m.d.comb += [ + self.ub.bus.connect(self._decoder.bus), + ] + + return m + + +if __name__ == "__main__": + platform = VersaECP5Platform85() + + soc = DDR3SoC(ddrphy_addr=0x00008000, dramcore_addr=0x00009000, + ddr_addr=0x10000000) + + soc.build(do_build=True) + platform.build(soc, do_program=True) diff --git a/examples/headless/main.c b/examples/headless/main.c index cfcd36e..bc3e98a 100644 --- a/examples/headless/main.c +++ b/examples/headless/main.c @@ -31,6 +31,8 @@ uint32_t gram_read(struct gramCtx *ctx, void *addr) { fprintf(stderr, "gram_read error (read bytes length mismatch: %d != %d)\n", received, sizeof(reply)); } + //printf("gram_read: 0x%08x: 0x%08x\n", addr, ntohl(reply)); + return ntohl(reply); } @@ -41,6 +43,8 @@ int gram_write(struct gramCtx *ctx, void *addr, uint32_t value) { *(uint32_t*)(commands+2) = htonl((uint32_t)addr >> 2); *(uint32_t*)(commands+6) = htonl(value); + //printf("gram_write: 0x%08x: 0x%08x\n", addr, value); + sent = write(*(int*)(ctx->user_data), commands, sizeof(commands)); if (sent != sizeof(commands)) { fprintf(stderr, "gram_write error (sent bytes length mismatch)\n"); @@ -100,8 +104,22 @@ int main(int argc, char *argv[]) { uint32_t pattern[kPatternSize]; const int kDumpWidth = 8; size_t i; + int res; + uint32_t tmp; int delay, miss = 0; + uint32_t ddr_base = 0x10000000; + +#if 1 + struct gramProfile profile = { + .mode_registers = { + 0x2708, 0x2054, 0x0512, 0x0000 + }, + .rdly_p0 = 2, + .rdly_p1 = 2, + }; +#endif +#if 0 struct gramProfile profile = { .mode_registers = { 0x320, 0x6, 0x200, 0x0 @@ -109,6 +127,8 @@ int main(int argc, char *argv[]) { .rdly_p0 = 2, .rdly_p1 = 2, }; +#endif + struct gramProfile profile2; if (argc < 3) { fprintf(stderr, "Usage: %s port baudrate\n", argv[0]); @@ -126,9 +146,57 @@ int main(int argc, char *argv[]) { ctx.user_data = &serial_port; printf("gram init... "); - gram_init(&ctx, &profile, (void*)0x10000000, (void*)0x00009000, (void*)0x00008000); + gram_init(&ctx, &profile, (void*)ddr_base, (void*)0x00009000, (void*)0x00008000); printf("done\n"); + printf("Rdly\np0: "); + for (size_t i = 0; i < 8; i++) { + profile2.rdly_p0 = i; + gram_load_calibration(&ctx, &profile2); + gram_reset_burstdet(&ctx); + for (size_t j = 0; j < 128; j++) { + tmp = gram_read(&ctx, ddr_base+4*j); + } + if (gram_read_burstdet(&ctx, 0)) { + printf("1"); + } else { + printf("0"); + } + fflush(stdout); + } + printf("\n"); + + printf("Rdly\np1: "); + for (size_t i = 0; i < 8; i++) { + profile2.rdly_p1 = i; + gram_load_calibration(&ctx, &profile2); + gram_reset_burstdet(&ctx); + for (size_t j = 0; j < 128; j++) { + tmp = gram_read(&ctx, ddr_base+4*j); + } + if (gram_read_burstdet(&ctx, 1)) { + printf("1"); + } else { + printf("0"); + } + fflush(stdout); + } + printf("\n"); + + printf("Auto calibrating... "); + res = gram_generate_calibration(&ctx, &profile2); + if (res != GRAM_ERR_NONE) { + printf("failed\n"); + gram_load_calibration(&ctx, &profile); + } else { + gram_load_calibration(&ctx, &profile2); + } + printf("done\n"); + + printf("Auto calibration profile:\n"); + printf("\tp0 rdly: %d\n", profile2.rdly_p0); + printf("\tp1 rdly: %d\n", profile2.rdly_p1); + gram_reset_burstdet(&ctx); srand(time(NULL)); @@ -137,7 +205,6 @@ int main(int argc, char *argv[]) { } printf("memtest... \n"); - uint32_t ddr_base = 0x10000000; printf("Writing data sequence..."); for (i = 0; i < kPatternSize; i++) { diff --git a/libgram/Makefile b/libgram/Makefile index 6093828..4cb7a19 100644 --- a/libgram/Makefile +++ b/libgram/Makefile @@ -1,6 +1,6 @@ OBJS := src/init.o src/dfii.o src/calibration.o -TRIPLE := riscv64-unknown-elf- +TRIPLE := CC := $(TRIPLE)gcc AS := $(TRIPLE)as @@ -8,7 +8,7 @@ OBJCOPY := $(TRIPLE)objcopy AR := $(TRIPLE)ar LD := $(TRIPLE)ld -CFLAGS += -fvisibility=hidden -nostdlib -Os -Iinclude -std=c99 -Wall -Werror -pedantic +CFLAGS += -fvisibility=hidden -nostdlib -O0 -g -Iinclude -std=c99 -Wall -Wno-error -pedantic -DGRAM_RW_FUNC LDFLAGS += -nostdlib ifeq ($(TRIPLE),riscv64-unknown-elf-) diff --git a/libgram/src/calibration.c b/libgram/src/calibration.c index a77c44a..cb7b359 100644 --- a/libgram/src/calibration.c +++ b/libgram/src/calibration.c @@ -57,7 +57,11 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { +#ifdef GRAM_RW_FUNC + gram_read(ctx, ctx->ddr_base); +#else tmp = ram[i]; +#endif } if (gram_read_burstdet(ctx, 0)) { @@ -74,7 +78,11 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { +#ifdef GRAM_RW_FUNC + gram_read(ctx, ctx->ddr_base); +#else tmp = ram[i]; +#endif } if (gram_read_burstdet(ctx, 1)) { @@ -92,7 +100,11 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { +#ifdef GRAM_RW_FUNC + gram_read(ctx, ctx->ddr_base); +#else tmp = ram[i]; +#endif } if (!gram_read_burstdet(ctx, 0)) { @@ -109,7 +121,11 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { +#ifdef GRAM_RW_FUNC + gram_read(ctx, ctx->ddr_base); +#else tmp = ram[i]; +#endif } if (!gram_read_burstdet(ctx, 1)) { -- 2.30.2