b31e19e9cdb6b8d5c8130dc25586285ecf566934
1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
12 from nmigen
.hdl
.ast
import Rose
13 from nmigen
.lib
.cdc
import FFSynchronizer
14 from nmigen
.utils
import log2_int
16 from lambdasoc
.periph
import Peripheral
18 from gram
.common
import *
19 from gram
.phy
.dfi
import Interface
20 from gram
.compat
import Timeline
22 __all__
= ["ECP5DDRPHY"]
25 class ECP5DDRPHYInit(Elaboratable
):
32 def elaborate(self
, platform
):
38 # DDRDLLA instance -------------------------------------------------------------------------
42 m
.submodules
+= Instance("DDRDLLA",
43 i_CLK
=ClockSignal("sync2x"),
44 i_RST
=ResetSignal("init"),
49 m
.submodules
+= FFSynchronizer(_lock
, lock
, o_domain
="init")
50 m
.d
.init
+= lock_d
.eq(lock
)
52 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
55 (1*t
, [freeze
.eq(1)]), # Freeze DDRDLLA
56 (2*t
, [self
.stop
.eq(1)]), # Stop ECLK domain
57 (3*t
, [self
.reset
.eq(1)]), # Reset ECLK domain
58 (4*t
, [self
.reset
.eq(0)]), # Release ECLK domain reset
59 (5*t
, [self
.stop
.eq(0)]), # Release ECLK domain stop
60 (6*t
, [freeze
.eq(0)]), # Release DDRDLLA freeze
61 (7*t
, [self
.pause
.eq(1)]), # Pause DQSBUFM
62 (8*t
, [update
.eq(1)]), # Update DDRDLLA
63 (9*t
, [update
.eq(0)]), # Release DDRDMMA update
64 (10*t
, [self
.pause
.eq(0)]), # Release DQSBUFM pause
66 m
.d
.comb
+= tl
.trigger
.eq(lock
& ~lock_d
) # Trigger timeline on lock rising edge
67 m
.submodules
+= DomainRenamer("init")(tl
)
72 class _DQSBUFMSettingManager(Elaboratable
):
73 """DQSBUFM setting manager.
75 The DQSBUFM primitive requires a very basic sequence when updating
76 read delay or other parameters. This elaboratable generates this
77 sequence from CSR events.
82 CSR storing the rdly value.
87 Pause signal for DQSBUFM.
88 readclksel : Signal(3), out
89 Readclksel signal for DQSBUFM.
91 def __init__(self
, rdly_csr
):
92 self
.rdly_csr
= rdly_csr
95 self
.readclksel
= Signal(3)
97 def elaborate(self
, platform
):
101 with m
.State("Idle"):
102 with m
.If(self
.rdly_csr
.w_stb
):
103 m
.d
.sync
+= self
.pause
.eq(1)
104 m
.next
= "RdlyUpdateRequested"
106 with m
.State("RdlyUpdateRequested"):
107 m
.d
.sync
+= self
.readclksel
.eq(self
.rdly_csr
.w_data
)
108 m
.next
= "ResetPause"
110 with m
.State("ResetPause"):
111 m
.d
.sync
+= self
.pause
.eq(0)
117 class ECP5DDRPHY(Peripheral
, Elaboratable
):
118 def __init__(self
, pads
, sys_clk_freq
=100e6
):
119 super().__init
__(name
="phy")
122 self
._sys
_clk
_freq
= sys_clk_freq
124 databits
= len(self
.pads
.dq
.io
)
125 if databits
% 8 != 0:
126 raise ValueError("DQ pads should come in a multiple of 8")
129 bank
= self
.csr_bank()
131 self
.burstdet
= bank
.csr(databits
//8, "rw")
134 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p0")]
135 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p1")]
136 self
.bitslip
= bank
.csr(3, "rw") # phase-delay on read
138 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
139 self
.bus
= self
._bridge
.bus
141 addressbits
= len(self
.pads
.a
.o0
)
142 bankbits
= len(self
.pads
.ba
.o0
)
144 if hasattr(self
.pads
, "cs_n") and hasattr(self
.pads
.cs_n
, "o0"):
145 nranks
= len(self
.pads
.cs_n
.o0
)
146 databits
= len(self
.pads
.dq
.io
)
147 self
.dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4,
150 # PHY settings -----------------------------------------------------------------------------
151 tck
= 1/(2*self
._sys
_clk
_freq
)
153 databits
= len(self
.pads
.dq
.io
)
154 cl
, cwl
= get_cl_cw("DDR3", tck
)
155 cl_sys_latency
= get_sys_latency(nphases
, cl
)
156 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
157 rdcmdphase
, rdphase
= get_sys_phases(nphases
, cl_sys_latency
, cl
)
158 wrcmdphase
, wrphase
= get_sys_phases(nphases
, cwl_sys_latency
, cwl
)
159 self
.settings
= PhySettings(
160 phytype
="ECP5DDRPHY",
163 dfi_databits
=4*databits
,
168 rdcmdphase
=rdcmdphase
,
169 wrcmdphase
=wrcmdphase
,
172 read_latency
=2 + cl_sys_latency
+ 2 + log2_int(4//nphases
) + 4,
173 write_latency
=cwl_sys_latency
176 def elaborate(self
, platform
):
178 comb
, sync
= m
.d
.comb
, m
.d
.sync
180 m
.submodules
.bridge
= self
._bridge
182 tck
= 1/(2*self
._sys
_clk
_freq
)
184 databits
= len(self
.pads
.dq
.io
)
186 burstdet_reg
= Signal(databits
//8, reset_less
=True)
187 m
.d
.comb
+= self
.burstdet
.r_data
.eq(burstdet_reg
)
190 with m
.If(self
.burstdet
.w_stb
):
191 m
.d
.sync
+= burstdet_reg
.eq(0)
193 # Init -------------------------------------------------------------------------------------
194 m
.submodules
.init
= init
= ECP5DDRPHYInit()
196 # Parameters -------------------------------------------------------------------------------
197 cl
, cwl
= get_cl_cw("DDR3", tck
)
198 cl_sys_latency
= get_sys_latency(nphases
, cl
)
199 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
201 # DFI Interface ----------------------------------------------------------------------------
206 # Clock --------------------------------------------------------------------------------
208 self
.pads
.clk
.o_clk
.eq(ClockSignal("dramsync")),
209 self
.pads
.clk
.o_fclk
.eq(ClockSignal("sync2x")),
211 for i
in range(len(self
.pads
.clk
.o0
)):
213 self
.pads
.clk
.o0
[i
].eq(0),
214 self
.pads
.clk
.o1
[i
].eq(1),
215 self
.pads
.clk
.o2
[i
].eq(0),
216 self
.pads
.clk
.o3
[i
].eq(1),
219 # Addresses and Commands ---------------------------------------------------------------
221 self
.pads
.a
.o_clk
.eq(ClockSignal("dramsync")),
222 self
.pads
.a
.o_fclk
.eq(ClockSignal("sync2x")),
223 self
.pads
.ba
.o_clk
.eq(ClockSignal("dramsync")),
224 self
.pads
.ba
.o_fclk
.eq(ClockSignal("sync2x")),
226 for i
in range(len(self
.pads
.a
.o0
)):
228 self
.pads
.a
.o0
[i
].eq(dfi
.phases
[0].address
[i
]),
229 self
.pads
.a
.o1
[i
].eq(dfi
.phases
[0].address
[i
]),
230 self
.pads
.a
.o2
[i
].eq(dfi
.phases
[1].address
[i
]),
231 self
.pads
.a
.o3
[i
].eq(dfi
.phases
[1].address
[i
]),
233 for i
in range(len(self
.pads
.ba
.o0
)):
235 self
.pads
.ba
.o0
[i
].eq(dfi
.phases
[0].bank
[i
]),
236 self
.pads
.ba
.o1
[i
].eq(dfi
.phases
[0].bank
[i
]),
237 self
.pads
.ba
.o2
[i
].eq(dfi
.phases
[1].bank
[i
]),
238 self
.pads
.ba
.o3
[i
].eq(dfi
.phases
[1].bank
[i
]),
241 # Control pins: all of thees have to be declared "xdr 4" when
242 # requesting the resource:
243 # ddr_pins = platform.request("ddr3", 0, xdr={"clk":4, "odt":4, ... })
244 controls
= ["ras", "cas", "we", "clk_en", "odt"]
245 if hasattr(self
.pads
, "rst"): # this gets renamed later to match dfi
246 controls
.append("rst")
247 if hasattr(self
.pads
, "cs"):
248 controls
.append("cs")
249 for name
in controls
:
250 print ("clock", name
, getattr(self
.pads
, name
))
251 pad
= getattr(self
.pads
, name
)
252 # sigh, convention in nmigen_boards is "rst" but in
253 # dfi.Interface it is "reset"
254 dfi2pads
= {'rst': 'reset', 'cs': 'cs_n'}
255 name
= dfi2pads
.get(name
, name
) # remap if exists
257 pad
.o_clk
.eq(ClockSignal("dramsync")),
258 pad
.o_fclk
.eq(ClockSignal("sync2x")),
260 for i
in range(len(pad
.o0
)):
262 pad
.o0
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
263 pad
.o1
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
264 pad
.o2
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
265 pad
.o3
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
268 # DQ ---------------------------------------------------------------------------------------
272 dqs_postamble
= Signal()
273 dqs_preamble
= Signal()
274 for i
in range(databits
//8):
284 datavalid_prev
= Signal()
285 m
.d
.sync
+= datavalid_prev
.eq(datavalid
)
287 dqsbufm_manager
= _DQSBUFMSettingManager(self
.rdly
[i
])
288 setattr(m
.submodules
, f
"dqsbufm_manager{i}", dqsbufm_manager
)
290 m
.submodules
+= Instance("DQSBUFM",
291 p_DQS_LI_DEL_ADJ
="MINUS",
293 p_DQS_LO_DEL_ADJ
="MINUS",
307 i_SCLK
=ClockSignal("sync"),
308 i_ECLK
=ClockSignal("sync2x"),
309 i_RST
=ResetSignal("dramsync"),
311 i_PAUSE
=init
.pause | dqsbufm_manager
.pause
,
314 # Assert LOADNs to use DDRDEL control
322 # Reads (generate shifted DQS clock for reads)
325 i_READCLKSEL0
=dqsbufm_manager
.readclksel
[0],
326 i_READCLKSEL1
=dqsbufm_manager
.readclksel
[1],
327 i_READCLKSEL2
=dqsbufm_manager
.readclksel
[2],
337 o_DATAVALID
=datavalid
,
339 # Writes (generate shifted ECLK clock for writes)
343 with m
.If(Rose(burstdet
)):
344 m
.d
.sync
+= burstdet_reg
[i
].eq(1)
346 # DQS and DM ---------------------------------------------------------------------------
347 dm_o_data
= Signal(8)
348 dm_o_data_d
= Signal(8, reset_less
=True)
349 dm_o_data_muxed
= Signal(4, reset_less
=True)
350 m
.d
.comb
+= dm_o_data
.eq(Cat(
351 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
352 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
353 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
354 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
356 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
357 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
358 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
359 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
361 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
363 with m
.If(bl8_chunk
):
364 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data_d
[4:])
366 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data
[:4])
368 m
.submodules
+= Instance("ODDRX2DQA",
369 i_RST
=ResetSignal("dramsync"),
370 i_ECLK
=ClockSignal("sync2x"),
371 i_SCLK
=ClockSignal("dramsync"),
373 i_D0
=dm_o_data_muxed
[0],
374 i_D1
=dm_o_data_muxed
[1],
375 i_D2
=dm_o_data_muxed
[2],
376 i_D3
=dm_o_data_muxed
[3],
377 o_Q
=self
.pads
.dm
.o
[i
])
382 Instance("ODDRX2DQSB",
383 i_RST
=ResetSignal("dramsync"),
384 i_ECLK
=ClockSignal("sync2x"),
385 i_SCLK
=ClockSignal(),
392 Instance("TSHX2DQSA",
393 i_RST
=ResetSignal("dramsync"),
394 i_ECLK
=ClockSignal("sync2x"),
395 i_SCLK
=ClockSignal(),
397 i_T0
=~
(dqs_oe | dqs_postamble
),
398 i_T1
=~
(dqs_oe | dqs_preamble
),
404 io_B
=self
.pads
.dqs
.p
[i
]),
407 for j
in range(8*i
, 8*(i
+1)):
408 dq_o
= Signal(name
="dq_o_%d" % j
)
409 dq_i
= Signal(name
="dq_i_%d" % j
)
410 dq_oe_n
= Signal(name
="dq_oe_n_%d" % j
)
411 dq_i_delayed
= Signal(name
="dq_i_delayed_%d" % j
)
412 dq_i_data
= Signal(4, name
="dq_i_data_%d" % j
)
413 dq_o_data
= Signal(8, name
="dq_o_data_%d" % j
)
414 dq_o_data_d
= Signal(8, reset_less
=True)
415 dq_o_data_muxed
= Signal(4, reset_less
=True)
416 m
.d
.comb
+= dq_o_data
.eq(Cat(
417 dfi
.phases
[0].wrdata
[0*databits
+j
],
418 dfi
.phases
[0].wrdata
[1*databits
+j
],
419 dfi
.phases
[0].wrdata
[2*databits
+j
],
420 dfi
.phases
[0].wrdata
[3*databits
+j
],
421 dfi
.phases
[1].wrdata
[0*databits
+j
],
422 dfi
.phases
[1].wrdata
[1*databits
+j
],
423 dfi
.phases
[1].wrdata
[2*databits
+j
],
424 dfi
.phases
[1].wrdata
[3*databits
+j
])
427 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
428 with m
.If(bl8_chunk
):
429 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data_d
[4:])
431 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data
[:4])
434 Instance("ODDRX2DQA",
435 i_RST
=ResetSignal("dramsync"),
436 i_ECLK
=ClockSignal("sync2x"),
437 i_SCLK
=ClockSignal(),
439 i_D0
=dq_o_data_muxed
[0],
440 i_D1
=dq_o_data_muxed
[1],
441 i_D2
=dq_o_data_muxed
[2],
442 i_D3
=dq_o_data_muxed
[3],
445 p_DEL_MODE
="DQS_ALIGNED_X2",
451 Instance("IDDRX2DQA",
452 i_RST
=ResetSignal("dramsync"),
453 i_ECLK
=ClockSignal("sync2x"),
454 i_SCLK
=ClockSignal(),
468 i_RST
=ResetSignal("dramsync"),
469 i_ECLK
=ClockSignal("sync2x"),
470 i_SCLK
=ClockSignal(),
479 io_B
=self
.pads
.dq
.io
[j
])
481 # shift-register delay on the incoming read data
482 dq_i_bs
= BitSlip(4, Const(0), cycles
=1)
483 m
.submodules
['dq_i_bitslip_%d' % j
] = dq_i_bs
484 dq_i_bs_o
= Signal(4, name
="dq_i_bs_o_%d" % j
)
485 dq_i_bs_o_d
= Signal(4, name
="dq_i_bs_o_d_%d" % j
)
486 comb
+= dq_i_bs
.i
.eq(dq_i_data
)
487 comb
+= dq_i_bs_o
.eq(dq_i_bs
.o
)
488 sync
+= dq_i_bs_o_d
.eq(dq_i_bs_o
) # delay by 1 clock
489 #with m.If(~datavalid_prev & datavalid):
491 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_bs_o_d
[0]),
492 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_bs_o_d
[1]),
493 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_bs_o_d
[2]),
494 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_bs_o_d
[3]),
496 #with m.Elif(datavalid):
498 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_bs_o
[0]),
499 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_bs_o
[1]),
500 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_bs_o
[2]),
501 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_bs_o
[3]),
504 # Read Control Path ------------------------------------------------------------------------
505 # Creates a shift register of read commands coming from the DFI interface. This shift register
506 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
507 # DFI interface that the read data is valid.
509 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
510 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
512 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
513 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
514 rddata_en
= Signal(self
.settings
.read_latency
)
515 rddata_en_last
= Signal
.like(rddata_en
)
516 m
.d
.comb
+= rddata_en
.eq(Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
517 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
518 for phase
in dfi
.phases
:
519 m
.d
.sync
+= phase
.rddata_valid
.eq(rddata_en
[-1])
520 m
.d
.comb
+= dqs_re
.eq(rddata_en
[cl_sys_latency
+ 1] | rddata_en
[cl_sys_latency
+ 2])
523 # Write Control Path -----------------------------------------------------------------------
524 # Creates a shift register of write commands coming from the DFI interface. This shift register
525 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
526 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
527 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
528 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
529 # FIXME: understand +2
530 wrdata_en
= Signal(cwl_sys_latency
+ 4)
531 wrdata_en_last
= Signal
.like(wrdata_en
)
532 m
.d
.comb
+= wrdata_en
.eq(Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
533 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
534 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 1] | wrdata_en
[cwl_sys_latency
+ 2])
535 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
536 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
538 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
539 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
540 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
541 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
542 m
.d
.comb
+= dqs_preamble
.eq(wrdata_en
[cwl_sys_latency
+ 0] & ~wrdata_en
[cwl_sys_latency
+ 1])
543 m
.d
.comb
+= dqs_postamble
.eq(wrdata_en
[cwl_sys_latency
+ 3] & ~wrdata_en
[cwl_sys_latency
+ 2])