Wire up missing CRG / DDR3 clock control / reset signals
[gram.git] / gram / phy / ecp5ddrphy.py
1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
4 # License: BSD
5
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
7 # DDR3: 800 MT/s
8
9 import math
10
11 from nmigen import *
12 from nmigen.hdl.ast import Rose
13 from nmigen.lib.cdc import FFSynchronizer
14 from nmigen.utils import log2_int
15
16 from lambdasoc.periph import Peripheral
17
18 from gram.common import *
19 from gram.phy.dfi import Interface
20 from gram.compat import Timeline
21
22 __all__ = ["ECP5DDRPHY"]
23
24
25 class ECP5DDRPHYInit(Elaboratable):
26 def __init__(self):
27 self.pause = Signal()
28 self.stop = Signal()
29 self.delay = Signal()
30 self.reset = Signal()
31
32 def elaborate(self, platform):
33 m = Module()
34
35 update = Signal()
36 freeze = Signal()
37
38 # DDRDLLA instance -------------------------------------------------------------------------
39 _lock = Signal()
40 lock = Signal()
41 lock_d = Signal()
42 m.submodules += Instance("DDRDLLA",
43 i_CLK=ClockSignal("sync2x"),
44 i_RST=ResetSignal("init"),
45 i_UDDCNTLN=~update,
46 i_FREEZE=freeze,
47 o_DDRDEL=self.delay,
48 o_LOCK=_lock)
49 m.submodules += FFSynchronizer(_lock, lock, o_domain="init")
50 m.d.init += lock_d.eq(lock)
51
52 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
53 t = 8 # in cycles
54 tl = Timeline([
55 (1*t, [ freeze.eq(1)]), # Freeze DDRDLLA
56 (2*t, [ self.stop.eq(1)]), # Stop ECLK domain
57 (3*t, [self.reset.eq(1)]), # Reset ECLK domain
58 (4*t, [self.reset.eq(0)]), # Release ECLK domain reset
59 (5*t, [ self.stop.eq(0)]), # Release ECLK domain stop
60 (6*t, [ freeze.eq(0)]), # Release DDRDLLA freeze
61 (7*t, [self.pause.eq(1)]), # Pause DQSBUFM
62 (8*t, [ update.eq(1)]), # Update DDRDLLA
63 (9*t, [ update.eq(0)]), # Release DDRDMMA update
64 (10*t, [self.pause.eq(0)]), # Release DQSBUFM pause
65 ])
66 m.d.comb += tl.trigger.eq(lock & ~lock_d) # Trigger timeline on lock rising edge
67 m.submodules += DomainRenamer("init")(tl)
68
69 return m
70
71
72 class _DQSBUFMSettingManager(Elaboratable):
73 """DQSBUFM setting manager.
74
75 The DQSBUFM primitive requires a very basic sequence when updating
76 read delay or other parameters. This elaboratable generates this
77 sequence from CSR events.
78
79 Parameters
80 ----------
81 rdly_slr : CSR
82 CSR storing the rdly value.
83
84 Attributes
85 ----------
86 pause : Signal(), out
87 Pause signal for DQSBUFM.
88 readclksel : Signal(3), out
89 Readclksel signal for DQSBUFM.
90 """
91 def __init__(self, rdly_csr):
92 self.rdly_csr = rdly_csr
93
94 self.pause = Signal()
95 self.readclksel = Signal(3)
96
97 def elaborate(self, platform):
98 m = Module()
99
100 with m.FSM():
101 with m.State("Idle"):
102 with m.If(self.rdly_csr.w_stb):
103 m.d.sync += self.pause.eq(1)
104 m.next = "RdlyUpdateRequested"
105
106 with m.State("RdlyUpdateRequested"):
107 m.d.sync += self.readclksel.eq(self.rdly_csr.w_data)
108 m.next = "ResetPause"
109
110 with m.State("ResetPause"):
111 m.d.sync += self.pause.eq(0)
112 m.next = "Idle"
113
114 return m
115
116
117 class ECP5DDRPHY(Peripheral, Elaboratable):
118 def __init__(self, pads, sys_clk_freq=100e6):
119 super().__init__(name="phy")
120
121 self.pads = pads
122 self._sys_clk_freq = sys_clk_freq
123 self.init = ECP5DDRPHYInit()
124
125 databits = len(self.pads.dq.io)
126 if databits % 8 != 0:
127 raise ValueError("DQ pads should come in a multiple of 8")
128
129 # CSR
130 bank = self.csr_bank()
131
132 self.burstdet = bank.csr(databits//8, "rw")
133
134 self.rdly = []
135 self.rdly += [bank.csr(3, "rw", name="rdly_p0")]
136 self.rdly += [bank.csr(3, "rw", name="rdly_p1")]
137 self.bitslip = bank.csr(3, "rw") # phase-delay on read
138
139 self._bridge = self.bridge(data_width=32, granularity=8, alignment=2)
140 self.bus = self._bridge.bus
141
142 addressbits = len(self.pads.a.o0)
143 bankbits = len(self.pads.ba.o0)
144 nranks = 1
145 if hasattr(self.pads, "cs_n") and hasattr(self.pads.cs_n, "o0"):
146 nranks = len(self.pads.cs_n.o0)
147 databits = len(self.pads.dq.io)
148 self.dfi = Interface(addressbits, bankbits, nranks, 4*databits, 4,
149 name="ecp5phy")
150
151 # PHY settings -----------------------------------------------------------------------------
152 tck = 1/(2*self._sys_clk_freq)
153 nphases = 2
154 databits = len(self.pads.dq.io)
155 cl, cwl = get_cl_cw("DDR3", tck)
156 cl_sys_latency = get_sys_latency(nphases, cl)
157 cwl_sys_latency = get_sys_latency(nphases, cwl)
158 rdphase = get_sys_phase(nphases, cl_sys_latency, cl)
159 wrphase = get_sys_phase(nphases, cwl_sys_latency, cwl)
160 self.settings = PhySettings(
161 phytype="ECP5DDRPHY",
162 memtype="DDR3",
163 databits=databits,
164 dfi_databits=4*databits,
165 nranks=nranks,
166 nphases=nphases,
167 rdphase=rdphase,
168 wrphase=wrphase,
169 rdcmdphase = (rdphase - 1)%nphases,
170 wrcmdphase = (wrphase - 1)%nphases,
171 cl=cl,
172 cwl=cwl,
173 read_latency = cl_sys_latency + 10,
174 write_latency=cwl_sys_latency
175 )
176
177 def elaborate(self, platform):
178 m = Module()
179
180 m.submodules.bridge = self._bridge
181
182 tck = 1/(2*self._sys_clk_freq)
183 nphases = 2
184 databits = len(self.pads.dq.io)
185
186 burstdet_reg = Signal(databits//8, reset_less=True)
187 m.d.comb += self.burstdet.r_data.eq(burstdet_reg)
188
189 # Burstdet clear
190 with m.If(self.burstdet.w_stb):
191 m.d.sync += burstdet_reg.eq(0)
192
193 # Init -------------------------------------------------------------------------------------
194 m.submodules.init = init = self.init
195
196 # Parameters -------------------------------------------------------------------------------
197 cl, cwl = get_cl_cw("DDR3", tck)
198 cl_sys_latency = get_sys_latency(nphases, cl)
199 cwl_sys_latency = get_sys_latency(nphases, cwl)
200
201 # DFI Interface ----------------------------------------------------------------------------
202 dfi = self.dfi
203
204 bl8_chunk = Signal()
205
206 # Clock --------------------------------------------------------------------------------
207 m.d.comb += [
208 self.pads.clk.o_clk.eq(ClockSignal("dramsync")),
209 self.pads.clk.o_fclk.eq(ClockSignal("sync2x")),
210 ]
211 for i in range(len(self.pads.clk.o0)):
212 m.d.comb += [
213 self.pads.clk.o0[i].eq(0),
214 self.pads.clk.o1[i].eq(1),
215 self.pads.clk.o2[i].eq(0),
216 self.pads.clk.o3[i].eq(1),
217 ]
218
219 # Addresses and Commands ---------------------------------------------------------------
220 m.d.comb += [
221 self.pads.a.o_clk.eq(ClockSignal("dramsync")),
222 self.pads.a.o_fclk.eq(ClockSignal("sync2x")),
223 self.pads.ba.o_clk.eq(ClockSignal("dramsync")),
224 self.pads.ba.o_fclk.eq(ClockSignal("sync2x")),
225 ]
226 for i in range(len(self.pads.a.o0)):
227 m.d.comb += [
228 self.pads.a.o0[i].eq(dfi.phases[0].address[i]),
229 self.pads.a.o1[i].eq(dfi.phases[0].address[i]),
230 self.pads.a.o2[i].eq(dfi.phases[1].address[i]),
231 self.pads.a.o3[i].eq(dfi.phases[1].address[i]),
232 ]
233 for i in range(len(self.pads.ba.o0)):
234 m.d.comb += [
235 self.pads.ba.o0[i].eq(dfi.phases[0].bank[i]),
236 self.pads.ba.o1[i].eq(dfi.phases[0].bank[i]),
237 self.pads.ba.o2[i].eq(dfi.phases[1].bank[i]),
238 self.pads.ba.o3[i].eq(dfi.phases[1].bank[i]),
239 ]
240
241 # Control pins: all of thees have to be declared "xdr 4" when
242 # requesting the resource:
243 # ddr_pins = platform.request("ddr3", 0, xdr={"clk":4, "odt":4, ... })
244 controls = ["ras", "cas", "we", "clk_en", "odt"]
245 if hasattr(self.pads, "rst"): # this gets renamed later to match dfi
246 controls.append("rst")
247 if hasattr(self.pads, "cs"):
248 controls.append("cs")
249 for name in controls:
250 print ("clock", name, getattr(self.pads, name))
251 pad = getattr(self.pads, name)
252 # sigh, convention in nmigen_boards is "rst" but in
253 # dfi.Interface it is "reset"
254 dfi2pads = {'rst': 'reset', 'cs': 'cs_n'}
255 name = dfi2pads.get(name, name) # remap if exists
256 if name == "reset":
257 m.d.comb += [
258 pad.o_clk.eq(ClockSignal("sync")),
259 ]
260 else:
261 m.d.comb += [
262 pad.o_clk.eq(ClockSignal("dramsync")),
263 pad.o_fclk.eq(ClockSignal("sync2x")),
264 ]
265 if name == "reset":
266 for i in range(len(pad.o)):
267 m.d.comb += [
268 pad.o[i].eq(getattr(dfi.phases[0], name)[i]),
269 ]
270 elif name == "cs_n":
271 # cs_n can't be directly connected to cs without being inverted first...
272 for i in range(len(pad.o0)):
273 m.d.comb += [
274 pad.o0[i].eq(~getattr(dfi.phases[0], name)[i]),
275 pad.o1[i].eq(~getattr(dfi.phases[0], name)[i]),
276 pad.o2[i].eq(~getattr(dfi.phases[1], name)[i]),
277 pad.o3[i].eq(~getattr(dfi.phases[1], name)[i]),
278 ]
279 else:
280 for i in range(len(pad.o0)):
281 m.d.comb += [
282 pad.o0[i].eq(getattr(dfi.phases[0], name)[i]),
283 pad.o1[i].eq(getattr(dfi.phases[0], name)[i]),
284 pad.o2[i].eq(getattr(dfi.phases[1], name)[i]),
285 pad.o3[i].eq(getattr(dfi.phases[1], name)[i]),
286 ]
287
288 # DQ ---------------------------------------------------------------------------------------
289 dq_oe = Signal()
290 dqs_re = Signal()
291 dqs_oe = Signal()
292 dqs_postamble = Signal()
293 dqs_preamble = Signal()
294 for i in range(databits//8):
295 # DQSBUFM
296 dqs_i = Signal()
297 dqsr90 = Signal()
298 dqsw270 = Signal()
299 dqsw = Signal()
300 rdpntr = Signal(3)
301 wrpntr = Signal(3)
302 burstdet = Signal()
303 datavalid = Signal()
304 datavalid_prev = Signal()
305 m.d.sync += datavalid_prev.eq(datavalid)
306
307 dqsbufm_manager = _DQSBUFMSettingManager(self.rdly[i])
308 setattr(m.submodules, f"dqsbufm_manager{i}", dqsbufm_manager)
309
310 m.submodules += Instance("DQSBUFM",
311 p_DQS_LI_DEL_ADJ="MINUS",
312 p_DQS_LI_DEL_VAL=1,
313 p_DQS_LO_DEL_ADJ="MINUS",
314 p_DQS_LO_DEL_VAL=4,
315
316 # Delay
317 i_DYNDELAY0=0,
318 i_DYNDELAY1=0,
319 i_DYNDELAY2=0,
320 i_DYNDELAY3=0,
321 i_DYNDELAY4=0,
322 i_DYNDELAY5=0,
323 i_DYNDELAY6=0,
324 i_DYNDELAY7=0,
325
326 # Clocks / Reset
327 i_SCLK=ClockSignal("sync"),
328 i_ECLK=ClockSignal("sync2x"),
329 i_RST=ResetSignal("dramsync"),
330 i_DDRDEL=init.delay,
331 i_PAUSE=init.pause | dqsbufm_manager.pause,
332
333 # Control
334 # Assert LOADNs to use DDRDEL control
335 i_RDLOADN=0,
336 i_RDMOVE=0,
337 i_RDDIRECTION=1,
338 i_WRLOADN=0,
339 i_WRMOVE=0,
340 i_WRDIRECTION=1,
341
342 # Reads (generate shifted DQS clock for reads)
343 i_READ0=dqs_re,
344 i_READ1=dqs_re,
345 i_READCLKSEL0=dqsbufm_manager.readclksel[0],
346 i_READCLKSEL1=dqsbufm_manager.readclksel[1],
347 i_READCLKSEL2=dqsbufm_manager.readclksel[2],
348 i_DQSI=dqs_i,
349 o_DQSR90=dqsr90,
350 o_RDPNTR0=rdpntr[0],
351 o_RDPNTR1=rdpntr[1],
352 o_RDPNTR2=rdpntr[2],
353 o_WRPNTR0=wrpntr[0],
354 o_WRPNTR1=wrpntr[1],
355 o_WRPNTR2=wrpntr[2],
356 o_BURSTDET=burstdet,
357 o_DATAVALID=datavalid,
358
359 # Writes (generate shifted ECLK clock for writes)
360 o_DQSW270=dqsw270,
361 o_DQSW=dqsw)
362
363 with m.If(Rose(burstdet)):
364 m.d.sync += burstdet_reg[i].eq(1)
365
366 # DQS and DM ---------------------------------------------------------------------------
367 dm_o_data = Signal(8)
368 dm_o_data_d = Signal(8, reset_less=True)
369 dm_o_data_muxed = Signal(4, reset_less=True)
370 m.d.comb += dm_o_data.eq(Cat(
371 dfi.phases[0].wrdata_mask[0*databits//8+i],
372 dfi.phases[0].wrdata_mask[1*databits//8+i],
373 dfi.phases[0].wrdata_mask[2*databits//8+i],
374 dfi.phases[0].wrdata_mask[3*databits//8+i],
375
376 dfi.phases[1].wrdata_mask[0*databits//8+i],
377 dfi.phases[1].wrdata_mask[1*databits//8+i],
378 dfi.phases[1].wrdata_mask[2*databits//8+i],
379 dfi.phases[1].wrdata_mask[3*databits//8+i]),
380 )
381 m.d.sync += dm_o_data_d.eq(dm_o_data)
382
383 with m.If(bl8_chunk):
384 m.d.sync += dm_o_data_muxed.eq(dm_o_data_d[4:])
385 with m.Else():
386 m.d.sync += dm_o_data_muxed.eq(dm_o_data[:4])
387
388 m.submodules += Instance("ODDRX2DQA",
389 i_RST=ResetSignal("dramsync"),
390 i_ECLK=ClockSignal("sync2x"),
391 i_SCLK=ClockSignal("dramsync"),
392 i_DQSW270=dqsw270,
393 i_D0=dm_o_data_muxed[0],
394 i_D1=dm_o_data_muxed[1],
395 i_D2=dm_o_data_muxed[2],
396 i_D3=dm_o_data_muxed[3],
397 o_Q=self.pads.dm.o[i])
398
399 dqs = Signal()
400 dqs_oe_n = Signal()
401 m.submodules += [
402 Instance("ODDRX2DQSB",
403 i_RST=ResetSignal("dramsync"),
404 i_ECLK=ClockSignal("sync2x"),
405 i_SCLK=ClockSignal(),
406 i_DQSW=dqsw,
407 i_D0=0,
408 i_D1=1,
409 i_D2=0,
410 i_D3=1,
411 o_Q=dqs),
412 Instance("TSHX2DQSA",
413 i_RST=ResetSignal("dramsync"),
414 i_ECLK=ClockSignal("sync2x"),
415 i_SCLK=ClockSignal(),
416 i_DQSW=dqsw,
417 i_T0=~(dqs_oe | dqs_postamble),
418 i_T1=~(dqs_oe | dqs_preamble),
419 o_Q=dqs_oe_n),
420 Instance("BB",
421 i_I=dqs,
422 i_T=dqs_oe_n,
423 o_O=dqs_i,
424 io_B=self.pads.dqs.p[i]),
425 ]
426
427 for j in range(8*i, 8*(i+1)):
428 dq_o = Signal()
429 dq_i = Signal()
430 dq_oe_n = Signal()
431 dq_i_delayed = Signal()
432 dq_i_data = Signal(4)
433 dq_o_data = Signal(8)
434 dq_o_data_d = Signal(8, reset_less=True)
435 dq_o_data_muxed = Signal(4, reset_less=True)
436 m.d.comb += dq_o_data.eq(Cat(
437 dfi.phases[0].wrdata[0*databits+j],
438 dfi.phases[0].wrdata[1*databits+j],
439 dfi.phases[0].wrdata[2*databits+j],
440 dfi.phases[0].wrdata[3*databits+j],
441 dfi.phases[1].wrdata[0*databits+j],
442 dfi.phases[1].wrdata[1*databits+j],
443 dfi.phases[1].wrdata[2*databits+j],
444 dfi.phases[1].wrdata[3*databits+j])
445 )
446
447 m.d.sync += dq_o_data_d.eq(dq_o_data)
448 with m.If(bl8_chunk):
449 m.d.sync += dq_o_data_muxed.eq(dq_o_data_d[4:])
450 with m.Else():
451 m.d.sync += dq_o_data_muxed.eq(dq_o_data[:4])
452
453 m.submodules += [
454 Instance("ODDRX2DQA",
455 i_RST=ResetSignal("dramsync"),
456 i_ECLK=ClockSignal("sync2x"),
457 i_SCLK=ClockSignal(),
458 i_DQSW270=dqsw270,
459 i_D0=dq_o_data_muxed[0],
460 i_D1=dq_o_data_muxed[1],
461 i_D2=dq_o_data_muxed[2],
462 i_D3=dq_o_data_muxed[3],
463 o_Q=dq_o),
464 Instance("DELAYG",
465 p_DEL_MODE = "DQS_ALIGNED_X2",
466 i_A = dq_i,
467 o_Z = dq_i_delayed),
468 Instance("IDDRX2DQA",
469 i_RST=ResetSignal("dramsync"),
470 i_ECLK=ClockSignal("sync2x"),
471 i_SCLK=ClockSignal(),
472 i_DQSR90=dqsr90,
473 i_RDPNTR0=rdpntr[0],
474 i_RDPNTR1=rdpntr[1],
475 i_RDPNTR2=rdpntr[2],
476 i_WRPNTR0=wrpntr[0],
477 i_WRPNTR1=wrpntr[1],
478 i_WRPNTR2=wrpntr[2],
479 i_D=dq_i_delayed,
480 o_Q0=dq_i_data[0],
481 o_Q1=dq_i_data[1],
482 o_Q2=dq_i_data[2],
483 o_Q3=dq_i_data[3]),
484 Instance("TSHX2DQA",
485 i_RST=ResetSignal("dramsync"),
486 i_ECLK=ClockSignal("sync2x"),
487 i_SCLK=ClockSignal(),
488 i_DQSW270=dqsw270,
489 i_T0=~dq_oe,
490 i_T1=~dq_oe,
491 o_Q=dq_oe_n),
492 Instance("BB",
493 i_I=dq_o,
494 i_T=dq_oe_n,
495 o_O=dq_i,
496 io_B=self.pads.dq.io[j])
497 ]
498 with m.If(~datavalid_prev & datavalid):
499 m.d.sync += [
500 dfi.phases[0].rddata[0*databits+j].eq(dq_i_data[0]),
501 dfi.phases[0].rddata[1*databits+j].eq(dq_i_data[1]),
502 dfi.phases[0].rddata[2*databits+j].eq(dq_i_data[2]),
503 dfi.phases[0].rddata[3*databits+j].eq(dq_i_data[3]),
504 ]
505 with m.Elif(datavalid):
506 m.d.sync += [
507 dfi.phases[1].rddata[0*databits+j].eq(dq_i_data[0]),
508 dfi.phases[1].rddata[1*databits+j].eq(dq_i_data[1]),
509 dfi.phases[1].rddata[2*databits+j].eq(dq_i_data[2]),
510 dfi.phases[1].rddata[3*databits+j].eq(dq_i_data[3]),
511 ]
512
513 # Read Control Path ------------------------------------------------------------------------
514 # Creates a shift register of read commands coming from the DFI interface. This shift register
515 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
516 # DFI interface that the read data is valid.
517 #
518 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
519 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
520 #
521 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
522 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
523 rddata_en = Signal(self.settings.read_latency)
524 rddata_en_last = Signal.like(rddata_en)
525 m.d.comb += rddata_en.eq(Cat(dfi.phases[self.settings.rdphase].rddata_en, rddata_en_last))
526 m.d.sync += rddata_en_last.eq(rddata_en)
527 m.d.comb += dqs_re.eq(rddata_en[cl_sys_latency + 1] | rddata_en[cl_sys_latency + 2])
528
529 rddata_valid = Signal()
530 m.d.sync += rddata_valid.eq(datavalid_prev & ~datavalid)
531 for phase in dfi.phases:
532 m.d.comb += phase.rddata_valid.eq(rddata_valid)
533
534 # Write Control Path -----------------------------------------------------------------------
535 # Creates a shift register of write commands coming from the DFI interface. This shift register
536 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
537 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
538 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
539 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
540 # FIXME: understand +2
541 wrdata_en = Signal(cwl_sys_latency + 4)
542 wrdata_en_last = Signal.like(wrdata_en)
543 m.d.comb += wrdata_en.eq(Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last))
544 m.d.sync += wrdata_en_last.eq(wrdata_en)
545 m.d.comb += dq_oe.eq(wrdata_en[cwl_sys_latency + 1] | wrdata_en[cwl_sys_latency + 2])
546 m.d.comb += bl8_chunk.eq(wrdata_en[cwl_sys_latency + 1])
547 m.d.comb += dqs_oe.eq(dq_oe)
548
549 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
550 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
551 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
552 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
553 m.d.comb += dqs_preamble.eq(wrdata_en[cwl_sys_latency + 0] & ~wrdata_en[cwl_sys_latency + 1])
554 m.d.comb += dqs_postamble.eq(wrdata_en[cwl_sys_latency + 3] & ~wrdata_en[cwl_sys_latency + 2])
555
556 return m