lambdasoc/periph/hyperram.py

   1 # Basic Implementation of HyperRAM
   2 #
   3 # Copyright (c) 2019 Antti Lukats <antti.lukats@gmail.com>
   4 # Copyright (c) 2019 Florent Kermarrec <florent@enjoy-digital.fr>
   5 # Copyright (c) 2021 gatecat <gatecat@ds0.me> [nmigen-soc port]
   6 # Copyright (C) 2022 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   7 #
   8 # Code from Lukats, Kermarrec and gatecat is Licensed BSD-2-Clause
   9 #
  10 # Modifications for the Libre-SOC Project funded by NLnet and NGI POINTER
  11 # under EU Grants 871528 and 957073, and Licensed under the LGPLv3+ License
  12
  13 """
  14 Usage example when wiring up an external pmod.
  15 (thanks to daveshah for this tip)
  16 use platform.add_extension to first define the pins:
  17
  18     from nmigen.resources.memory import HyperRAMResources
  19     hyperram_ios = HyperRAMResources(cs="B1", # or cs="C0 C1 C2 C3" for Quad
  20                                      dq="D0 D1 D2 D3 D4 D7 D6 D7",
  21                                      rwds="B2", rst_n="B3", ck_p="B4",
  22                                      attrs=Attrs(IOSTANDARD="LVCMOS33"))
  23     self.platform.add_resources(hyperram_ios)
  24     io = self.platform.request("hyperram")
  25
  26 and then declare the instance using those pins:
  27
  28     hyperram = HyperRAM(io=io, phy_kls=HyperRAMPHY,
  29                     latency=7) # Winbond W956D8MBYA
  30                                # latency=6 for Cypress S27KL0641DABHI020
  31
  32 this trick will work with the 1-IC HyperRAM PMOD by Piotr Esden, sold
  33 by 1bitsquared.  however for the *four* IC HyperRAM PMOD, *four* cs_n pins
  34 are needed. These are then used to select, in turn, each IC, sequentially:
  35     * Access to 0x00000-0xfffff will activate CS0n,
  36     * Access to 0x100000-0x1fffff will activate CS1n,
  37     * Access to 0x200000-0x2fffff will activate CS2n,
  38     * Access to 0x300000-0x3fffff will activate CS3n
  39
  40 TODO: interleave multiple HyperRAM cs_n's to give striped (like RAID)
  41 memory accesses behind one single Wishbone interface.
  42 TODO: investigate whether HyperBUS can do CSn-striping in hardware
  43 (it should do, but this will require configuration registers to be written)
  44 """
  45
  46
  47 from nmigen import (Elaboratable, Module, Signal, Record, Cat, Const)
  48 from nmigen.cli import rtlil
  49
  50 from nmigen_soc import wishbone
  51 from nmigen_soc.memory import MemoryMap
  52 from lambdasoc.periph import Peripheral
  53
  54
  55 # HyperRAM ASIC PHY -----------------------------------------------------------
  56
  57 class HyperRAMASICPhy(Elaboratable):
  58     def __init__(self, io):
  59         self.io = io
  60         self.ck = ck = Signal()
  61         self.cs  = cs = Signal(len(self.io.cs_n))
  62         self.rst_n = rst_n = Signal()
  63
  64         self.dq_o  = dq_o  = Signal(8)
  65         self.dq_i  = dq_i  = Signal(8)
  66         self.dq_oe = dq_oe = Signal()
  67
  68         self.rwds_o  = rwds_o  = Signal.like(self.io["rwds_o"])
  69         self.rwds_oe = rwds_oe = Signal()
  70
  71     def elaborate(self, platform):
  72         m = Module()
  73         comb = m.d.comb
  74         ck, cs, rst_n = self.ck, self.cs, self.rst_n
  75         dq_o, dq_i, dq_oe = self.dq_o, self.dq_i, self.dq_oe
  76         rwds_o, rwds_oe = self.rwds_o, self.rwds_oe
  77
  78         comb += [
  79             self.io["rwds_o"].eq(rwds_o),
  80             self.io["cs_n"].eq(~cs),
  81             self.io["csn_oe"].eq(0),
  82             self.io["ck_o"].eq(ck),
  83             self.io["ck_oe"].eq(0),
  84             self.io["rwds_oe"].eq(~rwds_oe),
  85             self.io["rst_n"].eq(rst_n),
  86         ]
  87
  88         for i in range(8):
  89             comb += [
  90                 self.io[f"d{i}_o"].eq(dq_o[i]),
  91                 self.io[f"d{i}_oe"].eq(~dq_oe),
  92                 dq_i[i].eq(self.io[f"d{i}_i"])
  93             ]
  94
  95         return m
  96
  97     def ports(self):
  98         return list(self.io.fields.values())
  99
 100
 101 # HyperRAM pads class (PHY) which can be used for testing and simulation
 102 # (without needing a platform instance). use as:
 103 #   dut = HyperRAM(io=HyperRamPads(), phy_kls=TestHyperRAMPHY)
 104
 105 class HyperRAMPads:
 106     def __init__(self, dw=8, n_cs=1):
 107         self.rst_n = Signal()
 108         self.ck  = Signal()
 109         self.cs_n = Signal(n_cs)
 110         self.dq   = Record([("oe", 1), ("o", dw),     ("i", dw)])
 111         self.rwds = Record([("oe", 1), ("o", dw//8),  ("i", dw//8)])
 112         self.dq.o.name = "dq_o"
 113         self.dq.i.name = "dq_i"
 114         self.dq.oe.name = "dq_oe"
 115         self.rwds.o.name = "rwds_o"
 116         self.rwds.i.name = "rwds_i"
 117         self.rwds.oe.name = "rwds_oe"
 118
 119     def ports(self):
 120         return [self.ck, self.cs_n, self.dq.o, self.dq.i, self.dq.oe,
 121                 self.rwds.o, self.rwds.oe, self.rst_n]
 122
 123
 124 class HyperRAMPHY(Elaboratable):
 125     def __init__(self, pads):
 126         self.pads = pads
 127         self.ck = pads.ck
 128         self.cs = Signal(len(self.pads.cs_n))
 129         self.rst_n = pads.rst_n
 130         self.dq_o = pads.dq.o
 131         self.dq_i = pads.dq.i
 132         self.dq_oe = pads.dq.oe
 133         self.rwds_o = pads.rwds.o
 134         self.rwds_oe = Signal()
 135
 136     def elaborate(self, platform):
 137         m = Module()
 138         m.d.comb += self.pads.cs_n.eq(self.cs)
 139         m.d.comb += self.pads.rwds.oe.eq(self.rwds_oe)
 140         return m
 141
 142     def ports(self):
 143         return self.pads.ports()
 144
 145
 146 # HyperRAM --------------------------------------------------------------------
 147
 148 class HyperRAM(Peripheral, Elaboratable):
 149     """HyperRAM
 150
 151     Provides a very simple/minimal HyperRAM core that should work with all
 152     FPGA/HyperRam chips:
 153     - FPGA vendor agnostic.
 154     - no setup/chip configuration (use default latency).
 155
 156     This core favors portability and ease of use over performance.
 157     Tested: Winbond W956D8MBYA latency=7
 158     Cypress S27KL0641DABHI020 requires latency=6
 159     """
 160     def __init__(self, *, io, phy_kls,
 161                           latency=6,
 162                           addr_width=23, # 8 GBytes, per IC
 163                           bus=None, features=frozenset()):
 164         super().__init__()
 165         self.n_cs = n_cs = len(io.cs_n)
 166         self.cs_bits = cs_bits = n_cs.bit_length()-1
 167         self.io = io
 168         self.phy = phy_kls(io)
 169         self.latency = latency
 170         # per IC, times n_cs
 171         addr_width += cs_bits
 172         self.bus = wishbone.Interface(addr_width=addr_width-2,
 173                                       data_width=32, granularity=8,
 174                                       features=features)
 175         self.size = 2**addr_width
 176         mmap = MemoryMap(addr_width=addr_width, data_width=8)
 177         mmap.add_resource(object(), name="hyperram", size=self.size)
 178         self.bus.memory_map = mmap
 179         # # #
 180
 181     def elaborate(self, platform):
 182         m = Module()
 183         m.submodules.phy = self.phy
 184         bus = self.bus
 185         cs_bits = self.cs_bits
 186         comb, sync = m.d.comb, m.d.sync
 187
 188         ck       = self.phy.ck
 189         clk_phase = Signal(2)
 190         ck_active = Signal()
 191         cs        = self.phy.cs
 192         ca        = Signal(48)
 193         ca_active = Signal()
 194         sr        = Signal(48)
 195         sr_new    = Signal(48)
 196
 197         dq_o = self.phy.dq_o
 198         dq_i = self.phy.dq_i
 199         dq_oe = self.phy.dq_oe
 200         dw = len(dq_o) # data width
 201
 202         rwds_o = self.phy.rwds_o
 203         rwds_oe = self.phy.rwds_oe
 204
 205         # chip&address selection: use the MSBs of the address for chip-select
 206         # (bus_adr_hi) by doing "1<<bus_adr_hi". this has to be captured
 207         # (cs_latch) and asserted as part of bus_latch.  therefore *before*
 208         # that happens (SEND-COMMAND-ADDRESS and WAIT-STATE) cs has to be
 209         # set to the "unlatched" version.
 210         bus_adr_lo = self.bus.adr[:-cs_bits]
 211         if cs_bits != 0:
 212             bus_adr_hi = self.bus.adr[-cs_bits:]
 213         else:
 214             bus_adr_hi = 0
 215
 216         # Clock Generation (sys_clk/4) -----------------------------------
 217         # this is a cheap-and-cheerful way to create phase-offsetted DDR:
 218         # simply divide the main clock into 4 phases.  it does mean that
 219         # the HyperRAM IC is being run at 1/4 rate. sigh.
 220         sync += clk_phase.eq(clk_phase + 1)
 221         with m.Switch(clk_phase):
 222             with m.Case(1):
 223                 sync += ck.eq(ck_active)
 224             with m.Case(3):
 225                 sync += ck.eq(0)
 226
 227         # Data Shift Register (for write and read) ------------------------
 228         dqi = Signal(dw)
 229         sync += dqi.eq(dq_i) # Sample on 90° and 270°
 230         with m.If(ca_active):
 231             comb += sr_new.eq(Cat(dqi[:8], sr[:-dw]))
 232         with m.Else():
 233             comb += sr_new.eq(Cat(dqi, sr[:-8]))
 234         with m.If(~clk_phase[0]):
 235             sync += sr.eq(sr_new) # Shift on 0° and 180°
 236
 237         # Data shift-out register ----------------------------------------
 238         comb += self.bus.dat_r.eq(sr_new), # To Wisbone
 239         with m.If(dq_oe):
 240             comb += dq_o.eq(sr[-dw:]), # To HyperRAM
 241         with m.If(dq_oe & ca_active):
 242             comb += dq_o.eq(sr[-8:]), # To HyperRAM, Only 8-bit during CMD/Addr.
 243
 244         # Command generation ----------------------------------------------
 245         ashift = {8:1, 16:0}[dw]
 246         la = 3-ashift
 247         comb += [
 248             ca[47].eq(~self.bus.we),     # R/W#
 249             ca[45].eq(1),                # Burst Type (Linear)
 250             ca[16:45].eq(bus_adr_lo[la:]),  # Row & Upper Column Address
 251             ca[ashift:3].eq(bus_adr_lo),    # Lower Column Address
 252         ]
 253
 254         # Latency count starts from the middle of the command (thus the -4).
 255         # In fixed latency mode (default), latency is 2 x Latency count.
 256         # We have 4 x sys_clk per RAM clock:
 257         latency_cycles = (self.latency * 2 * 4) - 4
 258
 259         # Bus Latch ----------------------------------------------------
 260         bus_adr   = Signal(32)
 261         bus_we    = Signal()
 262         bus_sel   = Signal(4)
 263         bus_latch = Signal()
 264         cs_latch  = Signal.like(cs)
 265         with m.If(bus_latch):
 266             with m.If(bus.we):
 267                 sync += sr.eq(Cat(Const(0, 16), bus.dat_w))
 268             sync += [ bus_we.eq(bus.we),
 269                       bus_sel.eq(bus.sel),
 270                       bus_adr.eq(bus_adr_lo),
 271                       cs_latch.eq(cs)
 272                     ]
 273
 274         # Sequencer -------------------------------------------------------
 275         cycles = Signal(8)
 276         first  = Signal()
 277         nfirst  = Signal() # not-first
 278         count_inc = Signal()
 279         dbg_cyc = Signal(8)
 280         comb += nfirst.eq(~first) # convenience
 281
 282         # when not idle run a cycles counter
 283         with m.If(count_inc):
 284             sync += dbg_cyc.eq(dbg_cyc+1)
 285         with m.Else():
 286             sync += dbg_cyc.eq(0)
 287
 288         # Main FSM
 289         with m.FSM() as fsm:
 290             comb += count_inc.eq(~fsm.ongoing("IDLE"))
 291             with m.State("IDLE"):
 292                 sync += first.eq(1)
 293                 with m.If(bus.cyc & bus.stb & (clk_phase == 0)):
 294                     sync += sr.eq(ca)
 295                     m.next = "SEND-COMMAND-ADDRESS"
 296                     sync += cycles.eq(0)
 297
 298             with m.State("SEND-COMMAND-ADDRESS"):
 299                 sync += cycles.eq(cycles+1)
 300                 comb += cs.eq(1<<bus_adr_hi) # Set CSn direct (not via latch)
 301                 comb += ck_active.eq(1) # Activate clock
 302                 comb += ca_active.eq(1) # Send Command on DQ.
 303                 comb += dq_oe.eq(1),    # Wait for 6*2 cycles...
 304                 with m.If(cycles == (6*2 - 1)):
 305                     m.next = "WAIT-LATENCY"
 306                     sync += cycles.eq(0)
 307
 308             with m.State("WAIT-LATENCY"):
 309                 sync += cycles.eq(cycles+1)
 310                 comb += cs.eq(1<<bus_adr_hi) # Set CSn directly (not via latch)
 311                 comb += ck_active.eq(1) # Activate clock
 312                 # Wait for Latency cycles...
 313                 with m.If(cycles == (latency_cycles - 1)):
 314                     comb += bus_latch.eq(1) # Latch Bus (and cs)
 315                     # Early Write Ack (to allow bursting).
 316                     comb += bus.ack.eq(bus.we)
 317                     m.next = "READ-WRITE-DATA0"
 318                     sync += cycles.eq(0)
 319
 320             # for-loop creates multple READ-WRITE-DATA states, to send/get
 321             # dw bits at a time.
 322             states = {8:4, 16:2}[dw]
 323             for n in range(states):
 324                 with m.State("READ-WRITE-DATA%d" % n):
 325                     sync += cycles.eq(cycles+1)
 326                     comb += cs.eq(cs_latch), # *now* set CSn from Latch
 327                     comb += ck_active.eq(1) # Activate clock
 328                     # Send Data on DQ/RWDS (for write).
 329                     with m.If(bus_we):
 330                         comb += dq_oe.eq(1)
 331                         comb += rwds_oe.eq(1)
 332                         for i in range(dw//8):
 333                             seli = ~bus_sel[4-1-n*dw//8-i]
 334                             comb += rwds_o[dw//8-1-i].eq(seli)
 335                     # Wait for 2 cycles (since HyperRAM's Clk = sys_clk/4).
 336                     with m.If(cycles == (2 - 1)):
 337                         # Set next default state (with rollover for bursts).
 338                         m.next = "READ-WRITE-DATA%d" % ((n + 1) % states)
 339                         sync += cycles.eq(0)
 340                         # On last state, see if we can continue the burst
 341                         # or if we should end it.
 342                         if n == states - 1:
 343                             sync += first.eq(0)
 344                             # Continue burst when consecutive access ready.
 345                             with m.If(bus.stb & bus.cyc &
 346                                       (bus.we == bus_we) &
 347                                       (bus_adr_lo == (bus_adr + 1)) &
 348                                       ((1<<bus_adr_hi) == cs_latch)):
 349                                 comb += bus_latch.eq(1), # Latch Bus (and cs)
 350                                 # Early Write Ack (to allow bursting).
 351                                 comb += bus.ack.eq(bus.we)
 352                             # Else end the burst.
 353                             with m.Elif(bus_we | nfirst):
 354                                 m.next = "IDLE"
 355                                 sync += cycles.eq(0)   # reset to start
 356                                 sync += cs_latch.eq(0) # helps debugging
 357                         # Read Ack (when dat_r ready).
 358                         if n == 0:
 359                             comb += bus.ack.eq(nfirst & ~bus_we)
 360
 361         return m
 362
 363     def ports(self):
 364         return self.phy.ports() + list(self.bus.fields.values())
 365
 366
 367 if __name__ == '__main__':
 368     layout=[('rwds_o', 1), ('rwds_oe', 1),
 369             ('cs_n', 1), ('csn_oe', 1),
 370             ('ck_o', 1), ('ck_oe', 1),
 371             ('rst_n', 1)]
 372     for i in range(8):
 373         layout += [('d%d_o' % i, 1), ('d%d_oe' % i, 1), ('d%d_i' % i, 1)]
 374     io = Record(layout=layout)
 375     dut = HyperRAM(io=io, phy_kls=HyperRAMASICPhy)
 376     vl = rtlil.convert(dut, ports=dut.ports())
 377     with open("test_hyperram.il", "w") as f:
 378         f.write(vl)
 379