1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
12 from nmigen
.lib
.cdc
import FFSynchronizer
13 from nmigen
.utils
import log2_int
15 from lambdasoc
.periph
import Peripheral
17 import gram
.stream
as stream
18 from gram
.common
import *
19 from gram
.phy
.dfi
import Interface
20 from gram
.compat
import Timeline
22 __all__
= ["ECP5DDRPHY"]
25 class ECP5DDRPHYInit(Elaboratable
):
32 def elaborate(self
, platform
):
39 # DDRDLLA instance -------------------------------------------------------------------------
42 m
.submodules
+= Instance("DDRDLLA",
43 i_CLK
=ClockSignal("sync2x"),
44 i_RST
=ResetSignal("init"),
51 m
.submodules
+= FFSynchronizer(_lock
, lock
, o_domain
="init")
52 m
.d
.init
+= lock_d
.eq(lock
)
53 m
.d
.sync
+= new_lock
.eq(lock
& ~lock_d
)
55 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
58 (1*t
, [freeze
.eq(1)]), # Freeze DDRDLLA
59 (2*t
, [self
.stop
.eq(1)]), # Stop ECLK domain
60 (3*t
, [self
.reset
.eq(1)]), # Reset ECLK domain
61 (4*t
, [self
.reset
.eq(0)]), # Release ECLK domain reset
62 (5*t
, [self
.stop
.eq(0)]), # Release ECLK domain stop
63 (6*t
, [freeze
.eq(0)]), # Release DDRDLLA freeze
64 (7*t
, [self
.pause
.eq(1)]), # Pause DQSBUFM
65 (8*t
, [update
.eq(1)]), # Update DDRDLLA
66 (9*t
, [update
.eq(0)]), # Release DDRDMMA update
67 (10*t
, [self
.pause
.eq(0)]), # Release DQSBUFM pause
71 m
.d
.comb
+= tl
.trigger
.eq(new_lock
)
73 m
.d
.comb
+= self
.delay
.eq(delay
)
78 class _DQSBUFMSettingManager(Elaboratable
):
79 """DQSBUFM setting manager.
81 The DQSBUFM primitive requires a very basic sequence when updating
82 read delay or other parameters. This elaboratable generates this
83 sequence from CSR events.
88 CSR storing the rdly value.
93 Pause signal for DQSBUFM.
94 readclksel : Signal(3), out
95 Readclksel signal for DQSBUFM.
97 def __init__(self
, rdly_csr
):
98 self
.rdly_csr
= rdly_csr
100 self
.pause
= Signal()
101 self
.readclksel
= Signal(3)
103 def elaborate(self
, platform
):
107 with m
.State("Idle"):
108 with m
.If(self
.rdly_csr
.w_stb
):
109 m
.d
.sync
+= self
.pause
.eq(1)
110 m
.next
= "RdlyUpdateRequested"
112 with m
.State("RdlyUpdateRequested"):
113 m
.d
.sync
+= self
.readclksel
.eq(self
.rdly_csr
.w_data
)
114 m
.next
= "ResetPause"
116 with m
.State("ResetPause"):
117 m
.d
.sync
+= self
.pause
.eq(0)
123 class ECP5DDRPHY(Peripheral
, Elaboratable
):
124 def __init__(self
, pads
, sys_clk_freq
=100e6
):
125 super().__init
__(name
="phy")
128 self
._sys
_clk
_freq
= sys_clk_freq
130 databits
= len(self
.pads
.dq
.io
)
131 if databits
% 8 != 0:
132 raise ValueError("DQ pads should come in a multiple of 8")
135 bank
= self
.csr_bank()
137 self
.burstdet
= bank
.csr(databits
//8, "rw")
140 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p0")]
141 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p1")]
143 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
144 self
.bus
= self
._bridge
.bus
146 addressbits
= len(self
.pads
.a
.o0
)
147 bankbits
= len(self
.pads
.ba
.o0
)
148 nranks
= 1 if not hasattr(self
.pads
, "cs") else len(self
.pads
.cs
.o0
)
149 databits
= len(self
.pads
.dq
.io
)
150 self
.dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4)
152 # PHY settings -----------------------------------------------------------------------------
153 tck
= 1/(2*self
._sys
_clk
_freq
)
155 databits
= len(self
.pads
.dq
.io
)
156 nranks
= 1 if not hasattr(self
.pads
, "cs") else len(self
.pads
.cs
.o0
)
157 cl
, cwl
= get_cl_cw("DDR3", tck
)
158 cl_sys_latency
= get_sys_latency(nphases
, cl
)
159 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
160 rdcmdphase
, rdphase
= get_sys_phases(nphases
, cl_sys_latency
, cl
)
161 wrcmdphase
, wrphase
= get_sys_phases(nphases
, cwl_sys_latency
, cwl
)
162 self
.settings
= PhySettings(
163 phytype
="ECP5DDRPHY",
166 dfi_databits
=4*databits
,
171 rdcmdphase
=rdcmdphase
,
172 wrcmdphase
=wrcmdphase
,
175 read_latency
=2 + cl_sys_latency
+ 2 + log2_int(4//nphases
) + 4,
176 write_latency
=cwl_sys_latency
179 def elaborate(self
, platform
):
182 m
.submodules
.bridge
= self
._bridge
184 tck
= 1/(2*self
._sys
_clk
_freq
)
186 databits
= len(self
.pads
.dq
.io
)
188 burstdet_reg
= Signal(databits
//8, reset_less
=True)
189 m
.d
.comb
+= self
.burstdet
.r_data
.eq(burstdet_reg
)
192 with m
.If(self
.burstdet
.w_stb
):
193 m
.d
.sync
+= burstdet_reg
.eq(0)
195 # Init -------------------------------------------------------------------------------------
196 m
.submodules
.init
= init
= ECP5DDRPHYInit()
198 # Parameters -------------------------------------------------------------------------------
199 cl
, cwl
= get_cl_cw("DDR3", tck
)
200 cl_sys_latency
= get_sys_latency(nphases
, cl
)
201 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
203 # DFI Interface ----------------------------------------------------------------------------
208 # Clock --------------------------------------------------------------------------------
210 self
.pads
.clk
.o_clk
.eq(ClockSignal("dramsync")),
211 self
.pads
.clk
.o_fclk
.eq(ClockSignal("sync2x")),
213 for i
in range(len(self
.pads
.clk
.o0
)):
215 self
.pads
.clk
.o0
[i
].eq(0),
216 self
.pads
.clk
.o1
[i
].eq(1),
217 self
.pads
.clk
.o2
[i
].eq(0),
218 self
.pads
.clk
.o3
[i
].eq(1),
221 # Addresses and Commands ---------------------------------------------------------------
223 self
.pads
.a
.o_clk
.eq(ClockSignal("dramsync")),
224 self
.pads
.a
.o_fclk
.eq(ClockSignal("sync2x")),
225 self
.pads
.ba
.o_clk
.eq(ClockSignal("dramsync")),
226 self
.pads
.ba
.o_fclk
.eq(ClockSignal("sync2x")),
228 for i
in range(len(self
.pads
.a
.o0
)):
230 self
.pads
.a
.o0
[i
].eq(dfi
.phases
[0].address
[i
]),
231 self
.pads
.a
.o1
[i
].eq(dfi
.phases
[0].address
[i
]),
232 self
.pads
.a
.o2
[i
].eq(dfi
.phases
[1].address
[i
]),
233 self
.pads
.a
.o3
[i
].eq(dfi
.phases
[1].address
[i
]),
235 for i
in range(len(self
.pads
.ba
.o0
)):
237 self
.pads
.ba
.o0
[i
].eq(dfi
.phases
[0].bank
[i
]),
238 self
.pads
.ba
.o1
[i
].eq(dfi
.phases
[0].bank
[i
]),
239 self
.pads
.ba
.o2
[i
].eq(dfi
.phases
[1].bank
[i
]),
240 self
.pads
.ba
.o3
[i
].eq(dfi
.phases
[1].bank
[i
]),
244 controls
= ["ras", "cas", "we", "clk_en", "odt"]
245 if hasattr(self
.pads
, "reset"):
246 controls
.append("reset")
247 if hasattr(self
.pads
, "cs"):
248 controls
.append("cs")
249 for name
in controls
:
251 getattr(self
.pads
, name
).o_clk
.eq(ClockSignal("dramsync")),
252 getattr(self
.pads
, name
).o_fclk
.eq(ClockSignal("sync2x")),
254 for i
in range(len(getattr(self
.pads
, name
).o0
)):
256 getattr(self
.pads
, name
).o0
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
257 getattr(self
.pads
, name
).o1
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
258 getattr(self
.pads
, name
).o2
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
259 getattr(self
.pads
, name
).o3
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
262 # DQ ---------------------------------------------------------------------------------------
266 dqs_postamble
= Signal()
267 dqs_preamble
= Signal()
268 for i
in range(databits
//8):
278 datavalid_prev
= Signal()
279 m
.d
.sync
+= datavalid_prev
.eq(datavalid
)
281 dqsbufm_manager
= _DQSBUFMSettingManager(self
.rdly
[i
])
282 setattr(m
.submodules
, f
"dqsbufm_manager{i}", dqsbufm_manager
)
284 m
.submodules
+= Instance("DQSBUFM",
285 p_DQS_LI_DEL_ADJ
="MINUS",
287 p_DQS_LO_DEL_ADJ
="MINUS",
301 i_SCLK
=ClockSignal("sync"),
302 i_ECLK
=ClockSignal("sync2x"),
303 i_RST
=ResetSignal("dramsync"),
305 i_PAUSE
=init
.pause | dqsbufm_manager
.pause
,
308 # Assert LOADNs to use DDRDEL control
316 # Reads (generate shifted DQS clock for reads)
319 i_READCLKSEL0
=dqsbufm_manager
.readclksel
[0],
320 i_READCLKSEL1
=dqsbufm_manager
.readclksel
[1],
321 i_READCLKSEL2
=dqsbufm_manager
.readclksel
[2],
331 o_DATAVALID
=datavalid
,
333 # Writes (generate shifted ECLK clock for writes)
338 m
.d
.sync
+= burstdet_reg
[i
].eq(1)
340 # DQS and DM ---------------------------------------------------------------------------
341 dm_o_data
= Signal(8)
342 dm_o_data_d
= Signal(8)
343 dm_o_data_muxed
= Signal(4)
344 m
.d
.comb
+= dm_o_data
.eq(Cat(
345 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
346 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
347 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
348 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
350 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
351 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
352 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
353 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
355 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
357 with m
.If(bl8_chunk
):
358 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data_d
[4:])
360 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data
[:4])
362 m
.submodules
+= Instance("ODDRX2DQA",
363 i_RST
=ResetSignal("dramsync"),
364 i_ECLK
=ClockSignal("sync2x"),
365 i_SCLK
=ClockSignal("dramsync"),
367 i_D0
=dm_o_data_muxed
[0],
368 i_D1
=dm_o_data_muxed
[1],
369 i_D2
=dm_o_data_muxed
[2],
370 i_D3
=dm_o_data_muxed
[3],
371 o_Q
=self
.pads
.dm
.o
[i
])
376 Instance("ODDRX2DQSB",
377 i_RST
=ResetSignal("dramsync"),
378 i_ECLK
=ClockSignal("sync2x"),
379 i_SCLK
=ClockSignal(),
386 Instance("TSHX2DQSA",
387 i_RST
=ResetSignal("dramsync"),
388 i_ECLK
=ClockSignal("sync2x"),
389 i_SCLK
=ClockSignal(),
391 i_T0
=~
(dqs_oe | dqs_postamble
),
392 i_T1
=~
(dqs_oe | dqs_preamble
),
398 io_B
=self
.pads
.dqs
.p
[i
]),
401 for j
in range(8*i
, 8*(i
+1)):
405 dq_i_delayed
= Signal()
406 dq_i_data
= Signal(4)
407 dq_o_data
= Signal(8)
408 dq_o_data_d
= Signal(8)
409 dq_o_data_muxed
= Signal(4)
410 m
.d
.comb
+= dq_o_data
.eq(Cat(
411 dfi
.phases
[0].wrdata
[0*databits
+j
],
412 dfi
.phases
[0].wrdata
[1*databits
+j
],
413 dfi
.phases
[0].wrdata
[2*databits
+j
],
414 dfi
.phases
[0].wrdata
[3*databits
+j
],
415 dfi
.phases
[1].wrdata
[0*databits
+j
],
416 dfi
.phases
[1].wrdata
[1*databits
+j
],
417 dfi
.phases
[1].wrdata
[2*databits
+j
],
418 dfi
.phases
[1].wrdata
[3*databits
+j
])
421 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
422 with m
.If(bl8_chunk
):
423 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data_d
[4:])
425 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data
[:4])
428 Instance("ODDRX2DQA",
429 i_RST
=ResetSignal("dramsync"),
430 i_ECLK
=ClockSignal("sync2x"),
431 i_SCLK
=ClockSignal(),
433 i_D0
=dq_o_data_muxed
[0],
434 i_D1
=dq_o_data_muxed
[1],
435 i_D2
=dq_o_data_muxed
[2],
436 i_D3
=dq_o_data_muxed
[3],
439 p_DEL_MODE
="DQS_ALIGNED_X2",
445 Instance("IDDRX2DQA",
446 i_RST
=ResetSignal("dramsync"),
447 i_ECLK
=ClockSignal("sync2x"),
448 i_SCLK
=ClockSignal(),
462 i_RST
=ResetSignal("dramsync"),
463 i_ECLK
=ClockSignal("sync2x"),
464 i_SCLK
=ClockSignal(),
473 io_B
=self
.pads
.dq
.io
[j
])
475 with m
.If(~datavalid_prev
& datavalid
):
477 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
478 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
479 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
480 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
482 with m
.Elif(datavalid
):
484 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
485 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
486 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
487 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
490 # Read Control Path ------------------------------------------------------------------------
491 # Creates a shift register of read commands coming from the DFI interface. This shift register
492 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
493 # DFI interface that the read data is valid.
495 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
496 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
498 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
499 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
500 rddata_en
= Signal(self
.settings
.read_latency
)
501 rddata_en_last
= Signal
.like(rddata_en
)
502 m
.d
.comb
+= rddata_en
.eq(Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
503 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
504 m
.d
.comb
+= dqs_re
.eq(rddata_en
[cl_sys_latency
+ 0] | rddata_en
[cl_sys_latency
+ 1] | rddata_en
[cl_sys_latency
+ 2])
506 rddata_valid
= Signal()
507 m
.d
.sync
+= rddata_valid
.eq(datavalid_prev
& ~datavalid
)
508 for phase
in dfi
.phases
:
509 m
.d
.comb
+= phase
.rddata_valid
.eq(rddata_valid
)
511 # Write Control Path -----------------------------------------------------------------------
512 # Creates a shift register of write commands coming from the DFI interface. This shift register
513 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
514 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
515 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
516 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
517 # FIXME: understand +2
518 wrdata_en
= Signal(cwl_sys_latency
+ 4)
519 wrdata_en_last
= Signal
.like(wrdata_en
)
520 m
.d
.comb
+= wrdata_en
.eq(Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
521 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
522 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 1] | wrdata_en
[cwl_sys_latency
+ 2])
523 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
524 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
526 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
527 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
528 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
529 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
530 m
.d
.comb
+= dqs_preamble
.eq(wrdata_en
[cwl_sys_latency
+ 0] & ~wrdata_en
[cwl_sys_latency
+ 1])
531 m
.d
.comb
+= dqs_postamble
.eq(wrdata_en
[cwl_sys_latency
+ 3] & ~wrdata_en
[cwl_sys_latency
+ 2])