gram.phy.ecp5ddrphy: Remove unused stream import
[gram.git] / gram / phy / ecp5ddrphy.py
1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
4 # License: BSD
5
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
7 # DDR3: 800 MT/s
8
9 import math
10
11 from nmigen import *
12 from nmigen.lib.cdc import FFSynchronizer
13 from nmigen.utils import log2_int
14
15 from lambdasoc.periph import Peripheral
16
17 from gram.common import *
18 from gram.phy.dfi import Interface
19 from gram.compat import Timeline
20
21 __all__ = ["ECP5DDRPHY"]
22
23
24 class ECP5DDRPHYInit(Elaboratable):
25 def __init__(self):
26 self.pause = Signal()
27 self.stop = Signal()
28 self.delay = Signal()
29 self.reset = Signal()
30
31 def elaborate(self, platform):
32 m = Module()
33
34 new_lock = Signal()
35 update = Signal()
36 freeze = Signal()
37
38 # DDRDLLA instance -------------------------------------------------------------------------
39 _lock = Signal()
40 delay = Signal()
41 m.submodules += Instance("DDRDLLA",
42 i_CLK=ClockSignal("sync2x"),
43 i_RST=ResetSignal("init"),
44 i_UDDCNTLN=~update,
45 i_FREEZE=freeze,
46 o_DDRDEL=delay,
47 o_LOCK=_lock)
48 lock = Signal()
49 lock_d = Signal()
50 m.submodules += FFSynchronizer(_lock, lock, o_domain="init")
51 m.d.init += lock_d.eq(lock)
52 m.d.sync += new_lock.eq(lock & ~lock_d)
53
54 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
55 t = 8 # in cycles
56 tl = Timeline([
57 (1*t, [freeze.eq(1)]), # Freeze DDRDLLA
58 (2*t, [self.stop.eq(1)]), # Stop ECLK domain
59 (3*t, [self.reset.eq(1)]), # Reset ECLK domain
60 (4*t, [self.reset.eq(0)]), # Release ECLK domain reset
61 (5*t, [self.stop.eq(0)]), # Release ECLK domain stop
62 (6*t, [freeze.eq(0)]), # Release DDRDLLA freeze
63 (7*t, [self.pause.eq(1)]), # Pause DQSBUFM
64 (8*t, [update.eq(1)]), # Update DDRDLLA
65 (9*t, [update.eq(0)]), # Release DDRDMMA update
66 (10*t, [self.pause.eq(0)]), # Release DQSBUFM pause
67 ])
68 m.submodules += tl
69 # Wait DDRDLLA Lock
70 m.d.comb += tl.trigger.eq(new_lock)
71
72 m.d.comb += self.delay.eq(delay)
73
74 return m
75
76
77 class _DQSBUFMSettingManager(Elaboratable):
78 """DQSBUFM setting manager.
79
80 The DQSBUFM primitive requires a very basic sequence when updating
81 read delay or other parameters. This elaboratable generates this
82 sequence from CSR events.
83
84 Parameters
85 ----------
86 rdly_slr : CSR
87 CSR storing the rdly value.
88
89 Attributes
90 ----------
91 pause : Signal(), out
92 Pause signal for DQSBUFM.
93 readclksel : Signal(3), out
94 Readclksel signal for DQSBUFM.
95 """
96 def __init__(self, rdly_csr):
97 self.rdly_csr = rdly_csr
98
99 self.pause = Signal()
100 self.readclksel = Signal(3)
101
102 def elaborate(self, platform):
103 m = Module()
104
105 with m.FSM():
106 with m.State("Idle"):
107 with m.If(self.rdly_csr.w_stb):
108 m.d.sync += self.pause.eq(1)
109 m.next = "RdlyUpdateRequested"
110
111 with m.State("RdlyUpdateRequested"):
112 m.d.sync += self.readclksel.eq(self.rdly_csr.w_data)
113 m.next = "ResetPause"
114
115 with m.State("ResetPause"):
116 m.d.sync += self.pause.eq(0)
117 m.next = "Idle"
118
119 return m
120
121
122 class ECP5DDRPHY(Peripheral, Elaboratable):
123 def __init__(self, pads, sys_clk_freq=100e6):
124 super().__init__(name="phy")
125
126 self.pads = pads
127 self._sys_clk_freq = sys_clk_freq
128
129 databits = len(self.pads.dq.io)
130 if databits % 8 != 0:
131 raise ValueError("DQ pads should come in a multiple of 8")
132
133 # CSR
134 bank = self.csr_bank()
135
136 self.burstdet = bank.csr(databits//8, "rw")
137
138 self.rdly = []
139 self.rdly += [bank.csr(3, "rw", name="rdly_p0")]
140 self.rdly += [bank.csr(3, "rw", name="rdly_p1")]
141
142 self._bridge = self.bridge(data_width=32, granularity=8, alignment=2)
143 self.bus = self._bridge.bus
144
145 addressbits = len(self.pads.a.o0)
146 bankbits = len(self.pads.ba.o0)
147 nranks = 1 if not hasattr(self.pads, "cs") else len(self.pads.cs.o0)
148 databits = len(self.pads.dq.io)
149 self.dfi = Interface(addressbits, bankbits, nranks, 4*databits, 4)
150
151 # PHY settings -----------------------------------------------------------------------------
152 tck = 1/(2*self._sys_clk_freq)
153 nphases = 2
154 databits = len(self.pads.dq.io)
155 nranks = 1 if not hasattr(self.pads, "cs") else len(self.pads.cs.o0)
156 cl, cwl = get_cl_cw("DDR3", tck)
157 cl_sys_latency = get_sys_latency(nphases, cl)
158 cwl_sys_latency = get_sys_latency(nphases, cwl)
159 rdcmdphase, rdphase = get_sys_phases(nphases, cl_sys_latency, cl)
160 wrcmdphase, wrphase = get_sys_phases(nphases, cwl_sys_latency, cwl)
161 self.settings = PhySettings(
162 phytype="ECP5DDRPHY",
163 memtype="DDR3",
164 databits=databits,
165 dfi_databits=4*databits,
166 nranks=nranks,
167 nphases=nphases,
168 rdphase=rdphase,
169 wrphase=wrphase,
170 rdcmdphase=rdcmdphase,
171 wrcmdphase=wrcmdphase,
172 cl=cl,
173 cwl=cwl,
174 read_latency=2 + cl_sys_latency + 2 + log2_int(4//nphases) + 4,
175 write_latency=cwl_sys_latency
176 )
177
178 def elaborate(self, platform):
179 m = Module()
180
181 m.submodules.bridge = self._bridge
182
183 tck = 1/(2*self._sys_clk_freq)
184 nphases = 2
185 databits = len(self.pads.dq.io)
186
187 burstdet_reg = Signal(databits//8, reset_less=True)
188 m.d.comb += self.burstdet.r_data.eq(burstdet_reg)
189
190 # Burstdet clear
191 with m.If(self.burstdet.w_stb):
192 m.d.sync += burstdet_reg.eq(0)
193
194 # Init -------------------------------------------------------------------------------------
195 m.submodules.init = init = ECP5DDRPHYInit()
196
197 # Parameters -------------------------------------------------------------------------------
198 cl, cwl = get_cl_cw("DDR3", tck)
199 cl_sys_latency = get_sys_latency(nphases, cl)
200 cwl_sys_latency = get_sys_latency(nphases, cwl)
201
202 # DFI Interface ----------------------------------------------------------------------------
203 dfi = self.dfi
204
205 bl8_chunk = Signal()
206
207 # Clock --------------------------------------------------------------------------------
208 m.d.comb += [
209 self.pads.clk.o_clk.eq(ClockSignal("dramsync")),
210 self.pads.clk.o_fclk.eq(ClockSignal("sync2x")),
211 ]
212 for i in range(len(self.pads.clk.o0)):
213 m.d.comb += [
214 self.pads.clk.o0[i].eq(0),
215 self.pads.clk.o1[i].eq(1),
216 self.pads.clk.o2[i].eq(0),
217 self.pads.clk.o3[i].eq(1),
218 ]
219
220 # Addresses and Commands ---------------------------------------------------------------
221 m.d.comb += [
222 self.pads.a.o_clk.eq(ClockSignal("dramsync")),
223 self.pads.a.o_fclk.eq(ClockSignal("sync2x")),
224 self.pads.ba.o_clk.eq(ClockSignal("dramsync")),
225 self.pads.ba.o_fclk.eq(ClockSignal("sync2x")),
226 ]
227 for i in range(len(self.pads.a.o0)):
228 m.d.comb += [
229 self.pads.a.o0[i].eq(dfi.phases[0].address[i]),
230 self.pads.a.o1[i].eq(dfi.phases[0].address[i]),
231 self.pads.a.o2[i].eq(dfi.phases[1].address[i]),
232 self.pads.a.o3[i].eq(dfi.phases[1].address[i]),
233 ]
234 for i in range(len(self.pads.ba.o0)):
235 m.d.comb += [
236 self.pads.ba.o0[i].eq(dfi.phases[0].bank[i]),
237 self.pads.ba.o1[i].eq(dfi.phases[0].bank[i]),
238 self.pads.ba.o2[i].eq(dfi.phases[1].bank[i]),
239 self.pads.ba.o3[i].eq(dfi.phases[1].bank[i]),
240 ]
241
242 # Control pins
243 controls = ["ras", "cas", "we", "clk_en", "odt"]
244 if hasattr(self.pads, "reset"):
245 controls.append("reset")
246 if hasattr(self.pads, "cs"):
247 controls.append("cs")
248 for name in controls:
249 m.d.comb += [
250 getattr(self.pads, name).o_clk.eq(ClockSignal("dramsync")),
251 getattr(self.pads, name).o_fclk.eq(ClockSignal("sync2x")),
252 ]
253 for i in range(len(getattr(self.pads, name).o0)):
254 m.d.comb += [
255 getattr(self.pads, name).o0[i].eq(getattr(dfi.phases[0], name)[i]),
256 getattr(self.pads, name).o1[i].eq(getattr(dfi.phases[0], name)[i]),
257 getattr(self.pads, name).o2[i].eq(getattr(dfi.phases[1], name)[i]),
258 getattr(self.pads, name).o3[i].eq(getattr(dfi.phases[1], name)[i]),
259 ]
260
261 # DQ ---------------------------------------------------------------------------------------
262 dq_oe = Signal()
263 dqs_re = Signal()
264 dqs_oe = Signal()
265 dqs_postamble = Signal()
266 dqs_preamble = Signal()
267 for i in range(databits//8):
268 # DQSBUFM
269 dqs_i = Signal()
270 dqsr90 = Signal()
271 dqsw270 = Signal()
272 dqsw = Signal()
273 rdpntr = Signal(3)
274 wrpntr = Signal(3)
275 burstdet = Signal()
276 datavalid = Signal()
277 datavalid_prev = Signal()
278 m.d.sync += datavalid_prev.eq(datavalid)
279
280 dqsbufm_manager = _DQSBUFMSettingManager(self.rdly[i])
281 setattr(m.submodules, f"dqsbufm_manager{i}", dqsbufm_manager)
282
283 m.submodules += Instance("DQSBUFM",
284 p_DQS_LI_DEL_ADJ="MINUS",
285 p_DQS_LI_DEL_VAL=1,
286 p_DQS_LO_DEL_ADJ="MINUS",
287 p_DQS_LO_DEL_VAL=4,
288
289 # Delay
290 i_DYNDELAY0=0,
291 i_DYNDELAY1=0,
292 i_DYNDELAY2=0,
293 i_DYNDELAY3=0,
294 i_DYNDELAY4=0,
295 i_DYNDELAY5=0,
296 i_DYNDELAY6=0,
297 i_DYNDELAY7=0,
298
299 # Clocks / Reset
300 i_SCLK=ClockSignal("sync"),
301 i_ECLK=ClockSignal("sync2x"),
302 i_RST=ResetSignal("dramsync"),
303 i_DDRDEL=init.delay,
304 i_PAUSE=init.pause | dqsbufm_manager.pause,
305
306 # Control
307 # Assert LOADNs to use DDRDEL control
308 i_RDLOADN=0,
309 i_RDMOVE=0,
310 i_RDDIRECTION=1,
311 i_WRLOADN=0,
312 i_WRMOVE=0,
313 i_WRDIRECTION=1,
314
315 # Reads (generate shifted DQS clock for reads)
316 i_READ0=dqs_re,
317 i_READ1=dqs_re,
318 i_READCLKSEL0=dqsbufm_manager.readclksel[0],
319 i_READCLKSEL1=dqsbufm_manager.readclksel[1],
320 i_READCLKSEL2=dqsbufm_manager.readclksel[2],
321 i_DQSI=dqs_i,
322 o_DQSR90=dqsr90,
323 o_RDPNTR0=rdpntr[0],
324 o_RDPNTR1=rdpntr[1],
325 o_RDPNTR2=rdpntr[2],
326 o_WRPNTR0=wrpntr[0],
327 o_WRPNTR1=wrpntr[1],
328 o_WRPNTR2=wrpntr[2],
329 o_BURSTDET=burstdet,
330 o_DATAVALID=datavalid,
331
332 # Writes (generate shifted ECLK clock for writes)
333 o_DQSW270=dqsw270,
334 o_DQSW=dqsw)
335
336 with m.If(burstdet):
337 m.d.sync += burstdet_reg[i].eq(1)
338
339 # DQS and DM ---------------------------------------------------------------------------
340 dm_o_data = Signal(8)
341 dm_o_data_d = Signal(8)
342 dm_o_data_muxed = Signal(4)
343 m.d.comb += dm_o_data.eq(Cat(
344 dfi.phases[0].wrdata_mask[0*databits//8+i],
345 dfi.phases[0].wrdata_mask[1*databits//8+i],
346 dfi.phases[0].wrdata_mask[2*databits//8+i],
347 dfi.phases[0].wrdata_mask[3*databits//8+i],
348
349 dfi.phases[1].wrdata_mask[0*databits//8+i],
350 dfi.phases[1].wrdata_mask[1*databits//8+i],
351 dfi.phases[1].wrdata_mask[2*databits//8+i],
352 dfi.phases[1].wrdata_mask[3*databits//8+i]),
353 )
354 m.d.sync += dm_o_data_d.eq(dm_o_data)
355
356 with m.If(bl8_chunk):
357 m.d.sync += dm_o_data_muxed.eq(dm_o_data_d[4:])
358 with m.Else():
359 m.d.sync += dm_o_data_muxed.eq(dm_o_data[:4])
360
361 m.submodules += Instance("ODDRX2DQA",
362 i_RST=ResetSignal("dramsync"),
363 i_ECLK=ClockSignal("sync2x"),
364 i_SCLK=ClockSignal("dramsync"),
365 i_DQSW270=dqsw270,
366 i_D0=dm_o_data_muxed[0],
367 i_D1=dm_o_data_muxed[1],
368 i_D2=dm_o_data_muxed[2],
369 i_D3=dm_o_data_muxed[3],
370 o_Q=self.pads.dm.o[i])
371
372 dqs = Signal()
373 dqs_oe_n = Signal()
374 m.submodules += [
375 Instance("ODDRX2DQSB",
376 i_RST=ResetSignal("dramsync"),
377 i_ECLK=ClockSignal("sync2x"),
378 i_SCLK=ClockSignal(),
379 i_DQSW=dqsw,
380 i_D0=0,
381 i_D1=1,
382 i_D2=0,
383 i_D3=1,
384 o_Q=dqs),
385 Instance("TSHX2DQSA",
386 i_RST=ResetSignal("dramsync"),
387 i_ECLK=ClockSignal("sync2x"),
388 i_SCLK=ClockSignal(),
389 i_DQSW=dqsw,
390 i_T0=~(dqs_oe | dqs_postamble),
391 i_T1=~(dqs_oe | dqs_preamble),
392 o_Q=dqs_oe_n),
393 Instance("BB",
394 i_I=dqs,
395 i_T=dqs_oe_n,
396 o_O=dqs_i,
397 io_B=self.pads.dqs.p[i]),
398 ]
399
400 for j in range(8*i, 8*(i+1)):
401 dq_o = Signal()
402 dq_i = Signal()
403 dq_oe_n = Signal()
404 dq_i_delayed = Signal()
405 dq_i_data = Signal(4)
406 dq_o_data = Signal(8)
407 dq_o_data_d = Signal(8)
408 dq_o_data_muxed = Signal(4)
409 m.d.comb += dq_o_data.eq(Cat(
410 dfi.phases[0].wrdata[0*databits+j],
411 dfi.phases[0].wrdata[1*databits+j],
412 dfi.phases[0].wrdata[2*databits+j],
413 dfi.phases[0].wrdata[3*databits+j],
414 dfi.phases[1].wrdata[0*databits+j],
415 dfi.phases[1].wrdata[1*databits+j],
416 dfi.phases[1].wrdata[2*databits+j],
417 dfi.phases[1].wrdata[3*databits+j])
418 )
419
420 m.d.sync += dq_o_data_d.eq(dq_o_data)
421 with m.If(bl8_chunk):
422 m.d.sync += dq_o_data_muxed.eq(dq_o_data_d[4:])
423 with m.Else():
424 m.d.sync += dq_o_data_muxed.eq(dq_o_data[:4])
425
426 m.submodules += [
427 Instance("ODDRX2DQA",
428 i_RST=ResetSignal("dramsync"),
429 i_ECLK=ClockSignal("sync2x"),
430 i_SCLK=ClockSignal(),
431 i_DQSW270=dqsw270,
432 i_D0=dq_o_data_muxed[0],
433 i_D1=dq_o_data_muxed[1],
434 i_D2=dq_o_data_muxed[2],
435 i_D3=dq_o_data_muxed[3],
436 o_Q=dq_o),
437 Instance("DELAYF",
438 p_DEL_MODE="DQS_ALIGNED_X2",
439 i_LOADN=1,
440 i_MOVE=0,
441 i_DIRECTION=0,
442 i_A=dq_i,
443 o_Z=dq_i_delayed),
444 Instance("IDDRX2DQA",
445 i_RST=ResetSignal("dramsync"),
446 i_ECLK=ClockSignal("sync2x"),
447 i_SCLK=ClockSignal(),
448 i_DQSR90=dqsr90,
449 i_RDPNTR0=rdpntr[0],
450 i_RDPNTR1=rdpntr[1],
451 i_RDPNTR2=rdpntr[2],
452 i_WRPNTR0=wrpntr[0],
453 i_WRPNTR1=wrpntr[1],
454 i_WRPNTR2=wrpntr[2],
455 i_D=dq_i_delayed,
456 o_Q0=dq_i_data[0],
457 o_Q1=dq_i_data[1],
458 o_Q2=dq_i_data[2],
459 o_Q3=dq_i_data[3]),
460 Instance("TSHX2DQA",
461 i_RST=ResetSignal("dramsync"),
462 i_ECLK=ClockSignal("sync2x"),
463 i_SCLK=ClockSignal(),
464 i_DQSW270=dqsw270,
465 i_T0=~dq_oe,
466 i_T1=~dq_oe,
467 o_Q=dq_oe_n),
468 Instance("BB",
469 i_I=dq_o,
470 i_T=dq_oe_n,
471 o_O=dq_i,
472 io_B=self.pads.dq.io[j])
473 ]
474 with m.If(~datavalid_prev & datavalid):
475 m.d.sync += [
476 dfi.phases[0].rddata[0*databits+j].eq(dq_i_data[0]),
477 dfi.phases[0].rddata[1*databits+j].eq(dq_i_data[1]),
478 dfi.phases[0].rddata[2*databits+j].eq(dq_i_data[2]),
479 dfi.phases[0].rddata[3*databits+j].eq(dq_i_data[3]),
480 ]
481 with m.Elif(datavalid):
482 m.d.sync += [
483 dfi.phases[1].rddata[0*databits+j].eq(dq_i_data[0]),
484 dfi.phases[1].rddata[1*databits+j].eq(dq_i_data[1]),
485 dfi.phases[1].rddata[2*databits+j].eq(dq_i_data[2]),
486 dfi.phases[1].rddata[3*databits+j].eq(dq_i_data[3]),
487 ]
488
489 # Read Control Path ------------------------------------------------------------------------
490 # Creates a shift register of read commands coming from the DFI interface. This shift register
491 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
492 # DFI interface that the read data is valid.
493 #
494 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
495 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
496 #
497 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
498 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
499 rddata_en = Signal(self.settings.read_latency)
500 rddata_en_last = Signal.like(rddata_en)
501 m.d.comb += rddata_en.eq(Cat(dfi.phases[self.settings.rdphase].rddata_en, rddata_en_last))
502 m.d.sync += rddata_en_last.eq(rddata_en)
503 m.d.comb += dqs_re.eq(rddata_en[cl_sys_latency + 0] | rddata_en[cl_sys_latency + 1] | rddata_en[cl_sys_latency + 2])
504
505 rddata_valid = Signal()
506 m.d.sync += rddata_valid.eq(datavalid_prev & ~datavalid)
507 for phase in dfi.phases:
508 m.d.comb += phase.rddata_valid.eq(rddata_valid)
509
510 # Write Control Path -----------------------------------------------------------------------
511 # Creates a shift register of write commands coming from the DFI interface. This shift register
512 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
513 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
514 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
515 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
516 # FIXME: understand +2
517 wrdata_en = Signal(cwl_sys_latency + 4)
518 wrdata_en_last = Signal.like(wrdata_en)
519 m.d.comb += wrdata_en.eq(Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last))
520 m.d.sync += wrdata_en_last.eq(wrdata_en)
521 m.d.comb += dq_oe.eq(wrdata_en[cwl_sys_latency + 1] | wrdata_en[cwl_sys_latency + 2])
522 m.d.comb += bl8_chunk.eq(wrdata_en[cwl_sys_latency + 1])
523 m.d.comb += dqs_oe.eq(dq_oe)
524
525 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
526 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
527 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
528 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
529 m.d.comb += dqs_preamble.eq(wrdata_en[cwl_sys_latency + 0] & ~wrdata_en[cwl_sys_latency + 1])
530 m.d.comb += dqs_postamble.eq(wrdata_en[cwl_sys_latency + 3] & ~wrdata_en[cwl_sys_latency + 2])
531
532 return m