1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
12 from nmigen
.hdl
.ast
import Rose
13 from nmigen
.lib
.cdc
import FFSynchronizer
14 from nmigen
.utils
import log2_int
16 from lambdasoc
.periph
import Peripheral
18 from gram
.common
import *
19 from gram
.phy
.dfi
import Interface
20 from gram
.compat
import Timeline
22 __all__
= ["ECP5DDRPHY"]
25 class ECP5DDRPHYInit(Elaboratable
):
32 def elaborate(self
, platform
):
38 # DDRDLLA instance -------------------------------------------------------------------------
42 m
.submodules
+= Instance("DDRDLLA",
43 i_CLK
=ClockSignal("sync2x"),
44 i_RST
=ResetSignal("init"),
49 m
.submodules
+= FFSynchronizer(_lock
, lock
, o_domain
="init")
50 m
.d
.init
+= lock_d
.eq(lock
)
52 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
55 (1*t
, [freeze
.eq(1)]), # Freeze DDRDLLA
56 (2*t
, [self
.stop
.eq(1)]), # Stop ECLK domain
57 (3*t
, [self
.reset
.eq(1)]), # Reset ECLK domain
58 (4*t
, [self
.reset
.eq(0)]), # Release ECLK domain reset
59 (5*t
, [self
.stop
.eq(0)]), # Release ECLK domain stop
60 (6*t
, [freeze
.eq(0)]), # Release DDRDLLA freeze
61 (7*t
, [self
.pause
.eq(1)]), # Pause DQSBUFM
62 (8*t
, [update
.eq(1)]), # Update DDRDLLA
63 (9*t
, [update
.eq(0)]), # Release DDRDMMA update
64 (10*t
, [self
.pause
.eq(0)]), # Release DQSBUFM pause
66 m
.d
.comb
+= tl
.trigger
.eq(lock
& ~lock_d
) # Trigger timeline on lock rising edge
67 m
.submodules
+= DomainRenamer("init")(tl
)
72 class _DQSBUFMSettingManager(Elaboratable
):
73 """DQSBUFM setting manager.
75 The DQSBUFM primitive requires a very basic sequence when updating
76 read delay or other parameters. This elaboratable generates this
77 sequence from CSR events.
82 CSR storing the rdly value.
87 Pause signal for DQSBUFM.
88 readclksel : Signal(3), out
89 Readclksel signal for DQSBUFM.
91 def __init__(self
, rdly_csr
):
92 self
.rdly_csr
= rdly_csr
95 self
.readclksel
= Signal(3)
97 def elaborate(self
, platform
):
101 with m
.State("Idle"):
102 with m
.If(self
.rdly_csr
.w_stb
):
103 m
.d
.sync
+= self
.pause
.eq(1)
104 m
.next
= "RdlyUpdateRequested"
106 with m
.State("RdlyUpdateRequested"):
107 m
.d
.sync
+= self
.readclksel
.eq(self
.rdly_csr
.w_data
)
108 m
.next
= "ResetPause"
110 with m
.State("ResetPause"):
111 m
.d
.sync
+= self
.pause
.eq(0)
117 class ECP5DDRPHY(Peripheral
, Elaboratable
):
118 def __init__(self
, pads
, sys_clk_freq
=100e6
):
119 super().__init
__(name
="phy")
122 self
._sys
_clk
_freq
= sys_clk_freq
124 databits
= len(self
.pads
.dq
.io
)
125 if databits
% 8 != 0:
126 raise ValueError("DQ pads should come in a multiple of 8")
129 bank
= self
.csr_bank()
131 self
.burstdet
= bank
.csr(databits
//8, "rw")
134 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p0")]
135 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p1")]
137 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
138 self
.bus
= self
._bridge
.bus
140 addressbits
= len(self
.pads
.a
.o0
)
141 bankbits
= len(self
.pads
.ba
.o0
)
143 if hasattr(self
.pads
, "cs_n") and hasattr(self
.pads
.cs_n
, "o0"):
144 nranks
= len(self
.pads
.cs_n
.o0
)
145 databits
= len(self
.pads
.dq
.io
)
146 self
.dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4,
149 # PHY settings -----------------------------------------------------------------------------
150 tck
= 1/(2*self
._sys
_clk
_freq
)
152 databits
= len(self
.pads
.dq
.io
)
153 cl
, cwl
= get_cl_cw("DDR3", tck
)
154 cl_sys_latency
= get_sys_latency(nphases
, cl
)
155 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
156 rdcmdphase
, rdphase
= get_sys_phases(nphases
, cl_sys_latency
, cl
)
157 wrcmdphase
, wrphase
= get_sys_phases(nphases
, cwl_sys_latency
, cwl
)
158 self
.settings
= PhySettings(
159 phytype
="ECP5DDRPHY",
162 dfi_databits
=4*databits
,
167 rdcmdphase
=rdcmdphase
,
168 wrcmdphase
=wrcmdphase
,
171 read_latency
=2 + cl_sys_latency
+ 2 + log2_int(4//nphases
) + 4,
172 write_latency
=cwl_sys_latency
175 def elaborate(self
, platform
):
177 comb
, sync
= m
.d
.comb
, m
.d
.sync
179 m
.submodules
.bridge
= self
._bridge
181 tck
= 1/(2*self
._sys
_clk
_freq
)
183 databits
= len(self
.pads
.dq
.io
)
185 burstdet_reg
= Signal(databits
//8, reset_less
=True)
186 m
.d
.comb
+= self
.burstdet
.r_data
.eq(burstdet_reg
)
189 with m
.If(self
.burstdet
.w_stb
):
190 m
.d
.sync
+= burstdet_reg
.eq(0)
192 # Init -------------------------------------------------------------------------------------
193 m
.submodules
.init
= init
= ECP5DDRPHYInit()
195 # Parameters -------------------------------------------------------------------------------
196 cl
, cwl
= get_cl_cw("DDR3", tck
)
197 cl_sys_latency
= get_sys_latency(nphases
, cl
)
198 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
200 # DFI Interface ----------------------------------------------------------------------------
205 # Clock --------------------------------------------------------------------------------
207 self
.pads
.clk
.o_clk
.eq(ClockSignal("dramsync")),
208 self
.pads
.clk
.o_fclk
.eq(ClockSignal("sync2x")),
210 for i
in range(len(self
.pads
.clk
.o0
)):
212 self
.pads
.clk
.o0
[i
].eq(0),
213 self
.pads
.clk
.o1
[i
].eq(1),
214 self
.pads
.clk
.o2
[i
].eq(0),
215 self
.pads
.clk
.o3
[i
].eq(1),
218 # Addresses and Commands ---------------------------------------------------------------
220 self
.pads
.a
.o_clk
.eq(ClockSignal("dramsync")),
221 self
.pads
.a
.o_fclk
.eq(ClockSignal("sync2x")),
222 self
.pads
.ba
.o_clk
.eq(ClockSignal("dramsync")),
223 self
.pads
.ba
.o_fclk
.eq(ClockSignal("sync2x")),
225 for i
in range(len(self
.pads
.a
.o0
)):
227 self
.pads
.a
.o0
[i
].eq(dfi
.phases
[0].address
[i
]),
228 self
.pads
.a
.o1
[i
].eq(dfi
.phases
[0].address
[i
]),
229 self
.pads
.a
.o2
[i
].eq(dfi
.phases
[1].address
[i
]),
230 self
.pads
.a
.o3
[i
].eq(dfi
.phases
[1].address
[i
]),
232 for i
in range(len(self
.pads
.ba
.o0
)):
234 self
.pads
.ba
.o0
[i
].eq(dfi
.phases
[0].bank
[i
]),
235 self
.pads
.ba
.o1
[i
].eq(dfi
.phases
[0].bank
[i
]),
236 self
.pads
.ba
.o2
[i
].eq(dfi
.phases
[1].bank
[i
]),
237 self
.pads
.ba
.o3
[i
].eq(dfi
.phases
[1].bank
[i
]),
240 # Control pins: all of thees have to be declared "xdr 4" when
241 # requesting the resource:
242 # ddr_pins = platform.request("ddr3", 0, xdr={"clk":4, "odt":4, ... })
243 controls
= ["ras", "cas", "we", "clk_en", "odt"]
244 if hasattr(self
.pads
, "rst"): # this gets renamed later to match dfi
245 controls
.append("rst")
246 if hasattr(self
.pads
, "reset_n"):
247 controls
.append("reset_n")
248 if hasattr(self
.pads
, "cs"):
249 controls
.append("cs")
250 for name
in controls
:
251 print ("clock", name
, getattr(self
.pads
, name
))
252 pad
= getattr(self
.pads
, name
)
253 # sigh, convention in nmigen_boards is "rst" but in
254 # dfi.Interface it is "reset"
255 dfi2pads
= {'rst': 'reset_n', 'cs': 'cs_n'}
256 name
= dfi2pads
.get(name
, name
) # remap if exists
258 pad
.o_clk
.eq(ClockSignal("dramsync")),
259 pad
.o_fclk
.eq(ClockSignal("sync2x")),
261 for i
in range(len(pad
.o0
)):
263 pad
.o0
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
264 pad
.o1
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
265 pad
.o2
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
266 pad
.o3
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
269 # DQ ---------------------------------------------------------------------------------------
273 dqs_postamble
= Signal()
274 dqs_preamble
= Signal()
275 for i
in range(databits
//8):
285 datavalid_prev
= Signal()
286 m
.d
.sync
+= datavalid_prev
.eq(datavalid
)
288 dqsbufm_manager
= _DQSBUFMSettingManager(self
.rdly
[i
])
289 setattr(m
.submodules
, f
"dqsbufm_manager{i}", dqsbufm_manager
)
291 m
.submodules
+= Instance("DQSBUFM",
292 p_DQS_LI_DEL_ADJ
="MINUS",
294 p_DQS_LO_DEL_ADJ
="MINUS",
308 i_SCLK
=ClockSignal("sync"),
309 i_ECLK
=ClockSignal("sync2x"),
310 i_RST
=ResetSignal("dramsync"),
312 i_PAUSE
=init
.pause | dqsbufm_manager
.pause
,
315 # Assert LOADNs to use DDRDEL control
323 # Reads (generate shifted DQS clock for reads)
326 i_READCLKSEL0
=dqsbufm_manager
.readclksel
[0],
327 i_READCLKSEL1
=dqsbufm_manager
.readclksel
[1],
328 i_READCLKSEL2
=dqsbufm_manager
.readclksel
[2],
338 o_DATAVALID
=datavalid
,
340 # Writes (generate shifted ECLK clock for writes)
344 with m
.If(Rose(burstdet
)):
345 m
.d
.sync
+= burstdet_reg
[i
].eq(1)
347 # DQS and DM ---------------------------------------------------------------------------
348 dm_o_data
= Signal(8)
349 dm_o_data_d
= Signal(8, reset_less
=True)
350 dm_o_data_muxed
= Signal(4, reset_less
=True)
351 m
.d
.comb
+= dm_o_data
.eq(Cat(
352 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
353 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
354 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
355 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
357 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
358 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
359 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
360 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
362 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
364 with m
.If(bl8_chunk
):
365 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data_d
[4:])
367 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data
[:4])
369 m
.submodules
+= Instance("ODDRX2DQA",
370 i_RST
=ResetSignal("dramsync"),
371 i_ECLK
=ClockSignal("sync2x"),
372 i_SCLK
=ClockSignal("dramsync"),
374 i_D0
=dm_o_data_muxed
[0],
375 i_D1
=dm_o_data_muxed
[1],
376 i_D2
=dm_o_data_muxed
[2],
377 i_D3
=dm_o_data_muxed
[3],
378 o_Q
=self
.pads
.dm
.o
[i
])
383 Instance("ODDRX2DQSB",
384 i_RST
=ResetSignal("dramsync"),
385 i_ECLK
=ClockSignal("sync2x"),
386 i_SCLK
=ClockSignal(),
393 Instance("TSHX2DQSA",
394 i_RST
=ResetSignal("dramsync"),
395 i_ECLK
=ClockSignal("sync2x"),
396 i_SCLK
=ClockSignal(),
398 i_T0
=~
(dqs_oe | dqs_postamble
),
399 i_T1
=~
(dqs_oe | dqs_preamble
),
405 io_B
=self
.pads
.dqs
.p
[i
]),
408 for j
in range(8*i
, 8*(i
+1)):
409 dq_o
= Signal(name
="dq_o_%d" % j
)
410 dq_i
= Signal(name
="dq_i_%d" % j
)
411 dq_oe_n
= Signal(name
="dq_oe_n_%d" % j
)
412 dq_i_delayed
= Signal(name
="dq_i_delayed_%d" % j
)
413 dq_i_data
= Signal(4, name
="dq_i_data_%d" % j
)
414 dq_o_data
= Signal(8, name
="dq_o_data_%d" % j
)
415 dq_o_data_d
= Signal(8, reset_less
=True)
416 dq_o_data_muxed
= Signal(4, reset_less
=True)
417 m
.d
.comb
+= dq_o_data
.eq(Cat(
418 dfi
.phases
[0].wrdata
[0*databits
+j
],
419 dfi
.phases
[0].wrdata
[1*databits
+j
],
420 dfi
.phases
[0].wrdata
[2*databits
+j
],
421 dfi
.phases
[0].wrdata
[3*databits
+j
],
422 dfi
.phases
[1].wrdata
[0*databits
+j
],
423 dfi
.phases
[1].wrdata
[1*databits
+j
],
424 dfi
.phases
[1].wrdata
[2*databits
+j
],
425 dfi
.phases
[1].wrdata
[3*databits
+j
])
428 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
429 with m
.If(bl8_chunk
):
430 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data_d
[4:])
432 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data
[:4])
435 Instance("ODDRX2DQA",
436 i_RST
=ResetSignal("dramsync"),
437 i_ECLK
=ClockSignal("sync2x"),
438 i_SCLK
=ClockSignal(),
440 i_D0
=dq_o_data_muxed
[0],
441 i_D1
=dq_o_data_muxed
[1],
442 i_D2
=dq_o_data_muxed
[2],
443 i_D3
=dq_o_data_muxed
[3],
446 p_DEL_MODE
="DQS_ALIGNED_X2",
452 Instance("IDDRX2DQA",
453 i_RST
=ResetSignal("dramsync"),
454 i_ECLK
=ClockSignal("sync2x"),
455 i_SCLK
=ClockSignal(),
469 i_RST
=ResetSignal("dramsync"),
470 i_ECLK
=ClockSignal("sync2x"),
471 i_SCLK
=ClockSignal(),
480 io_B
=self
.pads
.dq
.io
[j
])
482 # shift-register delay on the incoming read data
483 dq_i_bs
= BitSlip(4, Const(0), Const(0), cycles
=1)
484 m
.submodules
['dq_i_bitslip_%d' % j
] = dq_i_bs
485 dq_i_bs_o
= Signal(4, name
="dq_i_bs_o_%d" % j
)
486 dq_i_bs_o_d
= Signal(4, name
="dq_i_bs_o_d_%d" % j
)
487 comb
+= dq_i_bs
.i
.eq(dq_i_data
)
488 comb
+= dq_i_bs_o
.eq(dq_i_bs
.o
)
489 sync
+= dq_i_bs_o_d
.eq(dq_i_bs_o
) # delay by 1 clock
490 #with m.If(~datavalid_prev & datavalid):
492 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_bs_o_d
[0]),
493 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_bs_o_d
[1]),
494 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_bs_o_d
[2]),
495 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_bs_o_d
[3]),
497 #with m.Elif(datavalid):
499 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_bs_o
[0]),
500 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_bs_o
[1]),
501 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_bs_o
[2]),
502 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_bs_o
[3]),
505 # Read Control Path ------------------------------------------------------------------------
506 # Creates a shift register of read commands coming from the DFI interface. This shift register
507 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
508 # DFI interface that the read data is valid.
510 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
511 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
513 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
514 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
515 rddata_en
= Signal(self
.settings
.read_latency
)
516 rddata_en_last
= Signal
.like(rddata_en
)
517 m
.d
.comb
+= rddata_en
.eq(Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
518 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
519 for phase
in dfi
.phases
:
520 m
.d
.sync
+= phase
.rddata_valid
.eq(rddata_en
[-1])
521 m
.d
.comb
+= dqs_re
.eq(rddata_en
[cl_sys_latency
+ 1] | rddata_en
[cl_sys_latency
+ 2])
524 # Write Control Path -----------------------------------------------------------------------
525 # Creates a shift register of write commands coming from the DFI interface. This shift register
526 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
527 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
528 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
529 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
530 # FIXME: understand +2
531 wrdata_en
= Signal(cwl_sys_latency
+ 4)
532 wrdata_en_last
= Signal
.like(wrdata_en
)
533 m
.d
.comb
+= wrdata_en
.eq(Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
534 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
535 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 1] | wrdata_en
[cwl_sys_latency
+ 2])
536 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
537 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
539 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
540 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
541 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
542 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
543 m
.d
.comb
+= dqs_preamble
.eq(wrdata_en
[cwl_sys_latency
+ 0] & ~wrdata_en
[cwl_sys_latency
+ 1])
544 m
.d
.comb
+= dqs_postamble
.eq(wrdata_en
[cwl_sys_latency
+ 3] & ~wrdata_en
[cwl_sys_latency
+ 2])