672d8b2c56ebe215dcc836d76cd681f80ce7060d
1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
12 from nmigen
.hdl
.ast
import Rose
13 from nmigen
.lib
.cdc
import FFSynchronizer
14 from nmigen
.utils
import log2_int
16 from lambdasoc
.periph
import Peripheral
18 from gram
.common
import *
19 from gram
.phy
.dfi
import Interface
20 from gram
.compat
import Timeline
22 __all__
= ["ECP5DDRPHY"]
25 class ECP5DDRPHYInit(Elaboratable
):
32 def elaborate(self
, platform
):
38 # DDRDLLA instance -------------------------------------------------------------------------
42 m
.submodules
+= Instance("DDRDLLA",
43 i_CLK
=ClockSignal("sync2x"),
44 i_RST
=ResetSignal("init"),
49 m
.submodules
+= FFSynchronizer(_lock
, lock
, o_domain
="init")
50 m
.d
.init
+= lock_d
.eq(lock
)
52 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
55 (1*t
, [freeze
.eq(1)]), # Freeze DDRDLLA
56 (2*t
, [self
.stop
.eq(1)]), # Stop ECLK domain
57 (3*t
, [self
.reset
.eq(1)]), # Reset ECLK domain
58 (4*t
, [self
.reset
.eq(0)]), # Release ECLK domain reset
59 (5*t
, [self
.stop
.eq(0)]), # Release ECLK domain stop
60 (6*t
, [freeze
.eq(0)]), # Release DDRDLLA freeze
61 (7*t
, [self
.pause
.eq(1)]), # Pause DQSBUFM
62 (8*t
, [update
.eq(1)]), # Update DDRDLLA
63 (9*t
, [update
.eq(0)]), # Release DDRDMMA update
64 (10*t
, [self
.pause
.eq(0)]), # Release DQSBUFM pause
66 m
.d
.comb
+= tl
.trigger
.eq(lock
& ~lock_d
) # Trigger timeline on lock rising edge
67 m
.submodules
+= DomainRenamer("init")(tl
)
72 class _DQSBUFMSettingManager(Elaboratable
):
73 """DQSBUFM setting manager.
75 The DQSBUFM primitive requires a very basic sequence when updating
76 read delay or other parameters. This elaboratable generates this
77 sequence from CSR events.
82 CSR storing the rdly value.
87 Pause signal for DQSBUFM.
88 readclksel : Signal(3), out
89 Readclksel signal for DQSBUFM.
91 def __init__(self
, rdly_csr
):
92 self
.rdly_csr
= rdly_csr
95 self
.readclksel
= Signal(3)
97 def elaborate(self
, platform
):
101 with m
.State("Idle"):
102 with m
.If(self
.rdly_csr
.w_stb
):
103 m
.d
.sync
+= self
.pause
.eq(1)
104 m
.next
= "RdlyUpdateRequested"
106 with m
.State("RdlyUpdateRequested"):
107 m
.d
.sync
+= self
.readclksel
.eq(self
.rdly_csr
.w_data
)
108 m
.next
= "ResetPause"
110 with m
.State("ResetPause"):
111 m
.d
.sync
+= self
.pause
.eq(0)
117 class ECP5DDRPHY(Peripheral
, Elaboratable
):
118 def __init__(self
, pads
, sys_clk_freq
=100e6
):
119 super().__init
__(name
="phy")
122 self
._sys
_clk
_freq
= sys_clk_freq
124 databits
= len(self
.pads
.dq
.io
)
125 if databits
% 8 != 0:
126 raise ValueError("DQ pads should come in a multiple of 8")
129 bank
= self
.csr_bank()
131 self
.burstdet
= bank
.csr(databits
//8, "rw")
134 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p0")]
135 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p1")]
137 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
138 self
.bus
= self
._bridge
.bus
140 addressbits
= len(self
.pads
.a
.o0
)
141 bankbits
= len(self
.pads
.ba
.o0
)
143 if hasattr(self
.pads
, "cs") and hasattr(self
.pads
.cs
, "o0"):
144 nranks
= len(self
.pads
.cs
.o0
)
145 databits
= len(self
.pads
.dq
.io
)
146 self
.dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4)
148 # PHY settings -----------------------------------------------------------------------------
149 tck
= 1/(2*self
._sys
_clk
_freq
)
151 databits
= len(self
.pads
.dq
.io
)
152 cl
, cwl
= get_cl_cw("DDR3", tck
)
153 cl_sys_latency
= get_sys_latency(nphases
, cl
)
154 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
155 rdcmdphase
, rdphase
= get_sys_phases(nphases
, cl_sys_latency
, cl
)
156 wrcmdphase
, wrphase
= get_sys_phases(nphases
, cwl_sys_latency
, cwl
)
157 self
.settings
= PhySettings(
158 phytype
="ECP5DDRPHY",
161 dfi_databits
=4*databits
,
166 rdcmdphase
=rdcmdphase
,
167 wrcmdphase
=wrcmdphase
,
170 read_latency
=2 + cl_sys_latency
+ 2 + log2_int(4//nphases
) + 4,
171 write_latency
=cwl_sys_latency
174 def elaborate(self
, platform
):
177 m
.submodules
.bridge
= self
._bridge
179 tck
= 1/(2*self
._sys
_clk
_freq
)
181 databits
= len(self
.pads
.dq
.io
)
183 burstdet_reg
= Signal(databits
//8, reset_less
=True)
184 m
.d
.comb
+= self
.burstdet
.r_data
.eq(burstdet_reg
)
187 with m
.If(self
.burstdet
.w_stb
):
188 m
.d
.sync
+= burstdet_reg
.eq(0)
190 # Init -------------------------------------------------------------------------------------
191 m
.submodules
.init
= init
= ECP5DDRPHYInit()
193 # Parameters -------------------------------------------------------------------------------
194 cl
, cwl
= get_cl_cw("DDR3", tck
)
195 cl_sys_latency
= get_sys_latency(nphases
, cl
)
196 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
198 # DFI Interface ----------------------------------------------------------------------------
203 # Clock --------------------------------------------------------------------------------
205 self
.pads
.clk
.o_clk
.eq(ClockSignal("dramsync")),
206 self
.pads
.clk
.o_fclk
.eq(ClockSignal("sync2x")),
208 for i
in range(len(self
.pads
.clk
.o0
)):
210 self
.pads
.clk
.o0
[i
].eq(0),
211 self
.pads
.clk
.o1
[i
].eq(1),
212 self
.pads
.clk
.o2
[i
].eq(0),
213 self
.pads
.clk
.o3
[i
].eq(1),
216 # Addresses and Commands ---------------------------------------------------------------
218 self
.pads
.a
.o_clk
.eq(ClockSignal("dramsync")),
219 self
.pads
.a
.o_fclk
.eq(ClockSignal("sync2x")),
220 self
.pads
.ba
.o_clk
.eq(ClockSignal("dramsync")),
221 self
.pads
.ba
.o_fclk
.eq(ClockSignal("sync2x")),
223 for i
in range(len(self
.pads
.a
.o0
)):
225 self
.pads
.a
.o0
[i
].eq(dfi
.phases
[0].address
[i
]),
226 self
.pads
.a
.o1
[i
].eq(dfi
.phases
[0].address
[i
]),
227 self
.pads
.a
.o2
[i
].eq(dfi
.phases
[1].address
[i
]),
228 self
.pads
.a
.o3
[i
].eq(dfi
.phases
[1].address
[i
]),
230 for i
in range(len(self
.pads
.ba
.o0
)):
232 self
.pads
.ba
.o0
[i
].eq(dfi
.phases
[0].bank
[i
]),
233 self
.pads
.ba
.o1
[i
].eq(dfi
.phases
[0].bank
[i
]),
234 self
.pads
.ba
.o2
[i
].eq(dfi
.phases
[1].bank
[i
]),
235 self
.pads
.ba
.o3
[i
].eq(dfi
.phases
[1].bank
[i
]),
238 # Control pins: all of thees have to be declared "xdr 4" when
239 # requesting the resource:
240 # ddr_pins = platform.request("ddr3", 0, xdr={"clk":4, "odt":4, ... })
241 controls
= ["ras", "cas", "we", "clk_en", "odt"]
242 if hasattr(self
.pads
, "rst"): # this gets renamed later to match dfi
243 controls
.append("rst")
244 if hasattr(self
.pads
, "reset_n"):
245 controls
.append("reset_n")
246 if hasattr(self
.pads
, "cs"):
247 controls
.append("cs")
248 for name
in controls
:
249 print ("clock", name
, getattr(self
.pads
, name
))
250 pad
= getattr(self
.pads
, name
)
251 # sigh, convention in nmigen_boards is "rst" but in
252 # dfi.Interface it is "reset"
256 pad
.o_clk
.eq(ClockSignal("dramsync")),
257 pad
.o_fclk
.eq(ClockSignal("sync2x")),
259 for i
in range(len(pad
.o0
)):
261 pad
.o0
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
262 pad
.o1
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
263 pad
.o2
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
264 pad
.o3
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
267 # DQ ---------------------------------------------------------------------------------------
271 dqs_postamble
= Signal()
272 dqs_preamble
= Signal()
273 for i
in range(databits
//8):
283 datavalid_prev
= Signal()
284 m
.d
.sync
+= datavalid_prev
.eq(datavalid
)
286 dqsbufm_manager
= _DQSBUFMSettingManager(self
.rdly
[i
])
287 setattr(m
.submodules
, f
"dqsbufm_manager{i}", dqsbufm_manager
)
289 m
.submodules
+= Instance("DQSBUFM",
290 p_DQS_LI_DEL_ADJ
="MINUS",
292 p_DQS_LO_DEL_ADJ
="MINUS",
306 i_SCLK
=ClockSignal("sync"),
307 i_ECLK
=ClockSignal("sync2x"),
308 i_RST
=ResetSignal("dramsync"),
310 i_PAUSE
=init
.pause | dqsbufm_manager
.pause
,
313 # Assert LOADNs to use DDRDEL control
321 # Reads (generate shifted DQS clock for reads)
324 i_READCLKSEL0
=dqsbufm_manager
.readclksel
[0],
325 i_READCLKSEL1
=dqsbufm_manager
.readclksel
[1],
326 i_READCLKSEL2
=dqsbufm_manager
.readclksel
[2],
336 o_DATAVALID
=datavalid
,
338 # Writes (generate shifted ECLK clock for writes)
342 with m
.If(Rose(burstdet
)):
343 m
.d
.sync
+= burstdet_reg
[i
].eq(1)
345 # DQS and DM ---------------------------------------------------------------------------
346 dm_o_data
= Signal(8)
347 dm_o_data_d
= Signal(8, reset_less
=True)
348 dm_o_data_muxed
= Signal(4, reset_less
=True)
349 m
.d
.comb
+= dm_o_data
.eq(Cat(
350 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
351 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
352 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
353 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
355 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
356 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
357 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
358 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
360 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
362 with m
.If(bl8_chunk
):
363 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data_d
[4:])
365 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data
[:4])
367 m
.submodules
+= Instance("ODDRX2DQA",
368 i_RST
=ResetSignal("dramsync"),
369 i_ECLK
=ClockSignal("sync2x"),
370 i_SCLK
=ClockSignal("dramsync"),
372 i_D0
=dm_o_data_muxed
[0],
373 i_D1
=dm_o_data_muxed
[1],
374 i_D2
=dm_o_data_muxed
[2],
375 i_D3
=dm_o_data_muxed
[3],
376 o_Q
=self
.pads
.dm
.o
[i
])
381 Instance("ODDRX2DQSB",
382 i_RST
=ResetSignal("dramsync"),
383 i_ECLK
=ClockSignal("sync2x"),
384 i_SCLK
=ClockSignal(),
391 Instance("TSHX2DQSA",
392 i_RST
=ResetSignal("dramsync"),
393 i_ECLK
=ClockSignal("sync2x"),
394 i_SCLK
=ClockSignal(),
396 i_T0
=~
(dqs_oe | dqs_postamble
),
397 i_T1
=~
(dqs_oe | dqs_preamble
),
403 io_B
=self
.pads
.dqs
.p
[i
]),
406 for j
in range(8*i
, 8*(i
+1)):
410 dq_i_delayed
= Signal()
411 dq_i_data
= Signal(4)
412 dq_o_data
= Signal(8)
413 dq_o_data_d
= Signal(8, reset_less
=True)
414 dq_o_data_muxed
= Signal(4, reset_less
=True)
415 m
.d
.comb
+= dq_o_data
.eq(Cat(
416 dfi
.phases
[0].wrdata
[0*databits
+j
],
417 dfi
.phases
[0].wrdata
[1*databits
+j
],
418 dfi
.phases
[0].wrdata
[2*databits
+j
],
419 dfi
.phases
[0].wrdata
[3*databits
+j
],
420 dfi
.phases
[1].wrdata
[0*databits
+j
],
421 dfi
.phases
[1].wrdata
[1*databits
+j
],
422 dfi
.phases
[1].wrdata
[2*databits
+j
],
423 dfi
.phases
[1].wrdata
[3*databits
+j
])
426 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
427 with m
.If(bl8_chunk
):
428 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data_d
[4:])
430 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data
[:4])
433 Instance("ODDRX2DQA",
434 i_RST
=ResetSignal("dramsync"),
435 i_ECLK
=ClockSignal("sync2x"),
436 i_SCLK
=ClockSignal(),
438 i_D0
=dq_o_data_muxed
[0],
439 i_D1
=dq_o_data_muxed
[1],
440 i_D2
=dq_o_data_muxed
[2],
441 i_D3
=dq_o_data_muxed
[3],
444 p_DEL_MODE
="DQS_ALIGNED_X2",
450 Instance("IDDRX2DQA",
451 i_RST
=ResetSignal("dramsync"),
452 i_ECLK
=ClockSignal("sync2x"),
453 i_SCLK
=ClockSignal(),
467 i_RST
=ResetSignal("dramsync"),
468 i_ECLK
=ClockSignal("sync2x"),
469 i_SCLK
=ClockSignal(),
478 io_B
=self
.pads
.dq
.io
[j
])
480 with m
.If(~datavalid_prev
& datavalid
):
482 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
483 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
484 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
485 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
487 with m
.Elif(datavalid
):
489 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
490 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
491 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
492 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
495 # Read Control Path ------------------------------------------------------------------------
496 # Creates a shift register of read commands coming from the DFI interface. This shift register
497 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
498 # DFI interface that the read data is valid.
500 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
501 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
503 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
504 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
505 rddata_en
= Signal(self
.settings
.read_latency
)
506 rddata_en_last
= Signal
.like(rddata_en
)
507 m
.d
.comb
+= rddata_en
.eq(Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
508 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
509 m
.d
.comb
+= dqs_re
.eq(rddata_en
[cl_sys_latency
+ 1] | rddata_en
[cl_sys_latency
+ 2])
511 rddata_valid
= Signal()
512 m
.d
.sync
+= rddata_valid
.eq(datavalid_prev
& ~datavalid
)
513 for phase
in dfi
.phases
:
514 m
.d
.comb
+= phase
.rddata_valid
.eq(rddata_valid
)
516 # Write Control Path -----------------------------------------------------------------------
517 # Creates a shift register of write commands coming from the DFI interface. This shift register
518 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
519 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
520 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
521 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
522 # FIXME: understand +2
523 wrdata_en
= Signal(cwl_sys_latency
+ 4)
524 wrdata_en_last
= Signal
.like(wrdata_en
)
525 m
.d
.comb
+= wrdata_en
.eq(Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
526 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
527 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 1] | wrdata_en
[cwl_sys_latency
+ 2])
528 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
529 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
531 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
532 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
533 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
534 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
535 m
.d
.comb
+= dqs_preamble
.eq(wrdata_en
[cwl_sys_latency
+ 0] & ~wrdata_en
[cwl_sys_latency
+ 1])
536 m
.d
.comb
+= dqs_postamble
.eq(wrdata_en
[cwl_sys_latency
+ 3] & ~wrdata_en
[cwl_sys_latency
+ 2])