1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
12 from nmigen
.hdl
.ast
import Rose
13 from nmigen
.lib
.cdc
import FFSynchronizer
14 from nmigen
.utils
import log2_int
16 from lambdasoc
.periph
import Peripheral
18 from gram
.common
import *
19 from gram
.phy
.dfi
import Interface
20 from gram
.compat
import Timeline
22 __all__
= ["ECP5DDRPHY"]
25 class ECP5DDRPHYInit(Elaboratable
):
32 def elaborate(self
, platform
):
38 # DDRDLLA instance -------------------------------------------------------------------------
42 m
.submodules
+= Instance("DDRDLLA",
43 i_CLK
=ClockSignal("sync2x"),
44 i_RST
=ResetSignal("init"),
49 m
.submodules
+= FFSynchronizer(_lock
, lock
, o_domain
="init")
50 m
.d
.init
+= lock_d
.eq(lock
)
52 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
55 (1*t
, [ freeze
.eq(1)]), # Freeze DDRDLLA
56 (2*t
, [ self
.stop
.eq(1)]), # Stop ECLK domain
57 (3*t
, [self
.reset
.eq(1)]), # Reset ECLK domain
58 (4*t
, [self
.reset
.eq(0)]), # Release ECLK domain reset
59 (5*t
, [ self
.stop
.eq(0)]), # Release ECLK domain stop
60 (6*t
, [ freeze
.eq(0)]), # Release DDRDLLA freeze
61 (7*t
, [self
.pause
.eq(1)]), # Pause DQSBUFM
62 (8*t
, [ update
.eq(1)]), # Update DDRDLLA
63 (9*t
, [ update
.eq(0)]), # Release DDRDMMA update
64 (10*t
, [self
.pause
.eq(0)]), # Release DQSBUFM pause
66 m
.d
.comb
+= tl
.trigger
.eq(lock
& ~lock_d
) # Trigger timeline on lock rising edge
67 m
.submodules
+= DomainRenamer("init")(tl
)
72 class _DQSBUFMSettingManager(Elaboratable
):
73 """DQSBUFM setting manager.
75 The DQSBUFM primitive requires a very basic sequence when updating
76 read delay or other parameters. This elaboratable generates this
77 sequence from CSR events.
82 CSR storing the rdly value.
87 Pause signal for DQSBUFM.
88 readclksel : Signal(3), out
89 Readclksel signal for DQSBUFM.
91 def __init__(self
, rdly_csr
):
92 self
.rdly_csr
= rdly_csr
95 self
.readclksel
= Signal(3)
97 def elaborate(self
, platform
):
101 with m
.State("Idle"):
102 with m
.If(self
.rdly_csr
.w_stb
):
103 m
.d
.sync
+= self
.pause
.eq(1)
104 m
.next
= "RdlyUpdateRequested"
106 with m
.State("RdlyUpdateRequested"):
107 m
.d
.sync
+= self
.readclksel
.eq(self
.rdly_csr
.w_data
)
108 m
.next
= "ResetPause"
110 with m
.State("ResetPause"):
111 m
.d
.sync
+= self
.pause
.eq(0)
117 class ECP5DDRPHY(Peripheral
, Elaboratable
):
118 def __init__(self
, pads
, sys_clk_freq
=100e6
):
119 super().__init
__(name
="phy")
122 self
._sys
_clk
_freq
= sys_clk_freq
123 self
.init
= ECP5DDRPHYInit()
125 databits
= len(self
.pads
.dq
.io
)
126 if databits
% 8 != 0:
127 raise ValueError("DQ pads should come in a multiple of 8")
130 bank
= self
.csr_bank()
132 self
.burstdet
= bank
.csr(databits
//8, "rw")
135 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p0")]
136 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p1")]
137 self
.bitslip
= bank
.csr(3, "rw") # phase-delay on read
139 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
140 self
.bus
= self
._bridge
.bus
142 addressbits
= len(self
.pads
.a
.o0
)
143 bankbits
= len(self
.pads
.ba
.o0
)
145 if hasattr(self
.pads
, "cs_n") and hasattr(self
.pads
.cs_n
, "o0"):
146 nranks
= len(self
.pads
.cs_n
.o0
)
147 databits
= len(self
.pads
.dq
.io
)
148 self
.dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4,
151 # PHY settings -----------------------------------------------------------------------------
152 tck
= 1/(2*self
._sys
_clk
_freq
)
154 databits
= len(self
.pads
.dq
.io
)
155 cl
, cwl
= get_cl_cw("DDR3", tck
)
156 cl_sys_latency
= get_sys_latency(nphases
, cl
)
157 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
158 rdphase
= get_sys_phase(nphases
, cl_sys_latency
, cl
)
159 wrphase
= get_sys_phase(nphases
, cwl_sys_latency
, cwl
)
160 self
.settings
= PhySettings(
161 phytype
="ECP5DDRPHY",
164 dfi_databits
=4*databits
,
169 rdcmdphase
= (rdphase
- 1)%nphases
,
170 wrcmdphase
= (wrphase
- 1)%nphases
,
173 read_latency
= cl_sys_latency
+ 10,
174 write_latency
=cwl_sys_latency
177 def elaborate(self
, platform
):
180 m
.submodules
.bridge
= self
._bridge
182 tck
= 1/(2*self
._sys
_clk
_freq
)
184 databits
= len(self
.pads
.dq
.io
)
186 burstdet_reg
= Signal(databits
//8, reset_less
=True)
187 m
.d
.comb
+= self
.burstdet
.r_data
.eq(burstdet_reg
)
190 with m
.If(self
.burstdet
.w_stb
):
191 m
.d
.sync
+= burstdet_reg
.eq(0)
193 # Init -------------------------------------------------------------------------------------
194 m
.submodules
.init
= init
= self
.init
196 # Parameters -------------------------------------------------------------------------------
197 cl
, cwl
= get_cl_cw("DDR3", tck
)
198 cl_sys_latency
= get_sys_latency(nphases
, cl
)
199 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
201 # DFI Interface ----------------------------------------------------------------------------
206 # Clock --------------------------------------------------------------------------------
208 self
.pads
.clk
.o_clk
.eq(ClockSignal("dramsync")),
209 self
.pads
.clk
.o_fclk
.eq(ClockSignal("sync2x")),
211 for i
in range(len(self
.pads
.clk
.o0
)):
213 self
.pads
.clk
.o0
[i
].eq(0),
214 self
.pads
.clk
.o1
[i
].eq(1),
215 self
.pads
.clk
.o2
[i
].eq(0),
216 self
.pads
.clk
.o3
[i
].eq(1),
219 # Addresses and Commands ---------------------------------------------------------------
221 self
.pads
.a
.o_clk
.eq(ClockSignal("dramsync")),
222 self
.pads
.a
.o_fclk
.eq(ClockSignal("sync2x")),
223 self
.pads
.ba
.o_clk
.eq(ClockSignal("dramsync")),
224 self
.pads
.ba
.o_fclk
.eq(ClockSignal("sync2x")),
226 for i
in range(len(self
.pads
.a
.o0
)):
228 self
.pads
.a
.o0
[i
].eq(dfi
.phases
[0].address
[i
]),
229 self
.pads
.a
.o1
[i
].eq(dfi
.phases
[0].address
[i
]),
230 self
.pads
.a
.o2
[i
].eq(dfi
.phases
[1].address
[i
]),
231 self
.pads
.a
.o3
[i
].eq(dfi
.phases
[1].address
[i
]),
233 for i
in range(len(self
.pads
.ba
.o0
)):
235 self
.pads
.ba
.o0
[i
].eq(dfi
.phases
[0].bank
[i
]),
236 self
.pads
.ba
.o1
[i
].eq(dfi
.phases
[0].bank
[i
]),
237 self
.pads
.ba
.o2
[i
].eq(dfi
.phases
[1].bank
[i
]),
238 self
.pads
.ba
.o3
[i
].eq(dfi
.phases
[1].bank
[i
]),
241 # Control pins: all of thees have to be declared "xdr 4" when
242 # requesting the resource:
243 # ddr_pins = platform.request("ddr3", 0, xdr={"clk":4, "odt":4, ... })
244 controls
= ["ras", "cas", "we", "clk_en", "odt"]
245 if hasattr(self
.pads
, "rst"): # this gets renamed later to match dfi
246 controls
.append("rst")
247 if hasattr(self
.pads
, "cs"):
248 controls
.append("cs")
249 for name
in controls
:
250 print ("clock", name
, getattr(self
.pads
, name
))
251 pad
= getattr(self
.pads
, name
)
252 # sigh, convention in nmigen_boards is "rst" but in
253 # dfi.Interface it is "reset"
254 dfi2pads
= {'rst': 'reset', 'cs': 'cs_n'}
255 name
= dfi2pads
.get(name
, name
) # remap if exists
258 pad
.o_clk
.eq(ClockSignal("sync")),
262 pad
.o_clk
.eq(ClockSignal("dramsync")),
263 pad
.o_fclk
.eq(ClockSignal("sync2x")),
266 for i
in range(len(pad
.o
)):
268 pad
.o
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
271 # cs_n can't be directly connected to cs without being inverted first...
272 for i
in range(len(pad
.o0
)):
274 pad
.o0
[i
].eq(~
getattr(dfi
.phases
[0], name
)[i
]),
275 pad
.o1
[i
].eq(~
getattr(dfi
.phases
[0], name
)[i
]),
276 pad
.o2
[i
].eq(~
getattr(dfi
.phases
[1], name
)[i
]),
277 pad
.o3
[i
].eq(~
getattr(dfi
.phases
[1], name
)[i
]),
280 for i
in range(len(pad
.o0
)):
282 pad
.o0
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
283 pad
.o1
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
284 pad
.o2
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
285 pad
.o3
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
288 # DQ ---------------------------------------------------------------------------------------
292 dqs_postamble
= Signal()
293 dqs_preamble
= Signal()
294 for i
in range(databits
//8):
304 datavalid_prev
= Signal()
305 m
.d
.sync
+= datavalid_prev
.eq(datavalid
)
307 dqsbufm_manager
= _DQSBUFMSettingManager(self
.rdly
[i
])
308 setattr(m
.submodules
, f
"dqsbufm_manager{i}", dqsbufm_manager
)
310 m
.submodules
+= Instance("DQSBUFM",
311 p_DQS_LI_DEL_ADJ
="MINUS",
313 p_DQS_LO_DEL_ADJ
="MINUS",
327 i_SCLK
=ClockSignal("sync"),
328 i_ECLK
=ClockSignal("sync2x"),
329 i_RST
=ResetSignal("dramsync"),
331 i_PAUSE
=init
.pause | dqsbufm_manager
.pause
,
334 # Assert LOADNs to use DDRDEL control
342 # Reads (generate shifted DQS clock for reads)
345 i_READCLKSEL0
=dqsbufm_manager
.readclksel
[0],
346 i_READCLKSEL1
=dqsbufm_manager
.readclksel
[1],
347 i_READCLKSEL2
=dqsbufm_manager
.readclksel
[2],
357 o_DATAVALID
=datavalid
,
359 # Writes (generate shifted ECLK clock for writes)
363 with m
.If(Rose(burstdet
)):
364 m
.d
.sync
+= burstdet_reg
[i
].eq(1)
366 # DQS and DM ---------------------------------------------------------------------------
367 dm_o_data
= Signal(8)
368 dm_o_data_d
= Signal(8, reset_less
=True)
369 dm_o_data_muxed
= Signal(4, reset_less
=True)
370 m
.d
.comb
+= dm_o_data
.eq(Cat(
371 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
372 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
373 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
374 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
376 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
377 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
378 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
379 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
381 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
383 with m
.If(bl8_chunk
):
384 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data_d
[4:])
386 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data
[:4])
388 m
.submodules
+= Instance("ODDRX2DQA",
389 i_RST
=ResetSignal("dramsync"),
390 i_ECLK
=ClockSignal("sync2x"),
391 i_SCLK
=ClockSignal("dramsync"),
393 i_D0
=dm_o_data_muxed
[0],
394 i_D1
=dm_o_data_muxed
[1],
395 i_D2
=dm_o_data_muxed
[2],
396 i_D3
=dm_o_data_muxed
[3],
397 o_Q
=self
.pads
.dm
.o
[i
])
402 Instance("ODDRX2DQSB",
403 i_RST
=ResetSignal("dramsync"),
404 i_ECLK
=ClockSignal("sync2x"),
405 i_SCLK
=ClockSignal(),
412 Instance("TSHX2DQSA",
413 i_RST
=ResetSignal("dramsync"),
414 i_ECLK
=ClockSignal("sync2x"),
415 i_SCLK
=ClockSignal(),
417 i_T0
=~
(dqs_oe | dqs_postamble
),
418 i_T1
=~
(dqs_oe | dqs_preamble
),
424 io_B
=self
.pads
.dqs
.p
[i
]),
427 for j
in range(8*i
, 8*(i
+1)):
431 dq_i_delayed
= Signal()
432 dq_i_data
= Signal(4)
433 dq_o_data
= Signal(8)
434 dq_o_data_d
= Signal(8, reset_less
=True)
435 dq_o_data_muxed
= Signal(4, reset_less
=True)
436 m
.d
.comb
+= dq_o_data
.eq(Cat(
437 dfi
.phases
[0].wrdata
[0*databits
+j
],
438 dfi
.phases
[0].wrdata
[1*databits
+j
],
439 dfi
.phases
[0].wrdata
[2*databits
+j
],
440 dfi
.phases
[0].wrdata
[3*databits
+j
],
441 dfi
.phases
[1].wrdata
[0*databits
+j
],
442 dfi
.phases
[1].wrdata
[1*databits
+j
],
443 dfi
.phases
[1].wrdata
[2*databits
+j
],
444 dfi
.phases
[1].wrdata
[3*databits
+j
])
447 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
448 with m
.If(bl8_chunk
):
449 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data_d
[4:])
451 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data
[:4])
454 Instance("ODDRX2DQA",
455 i_RST
=ResetSignal("dramsync"),
456 i_ECLK
=ClockSignal("sync2x"),
457 i_SCLK
=ClockSignal(),
459 i_D0
=dq_o_data_muxed
[0],
460 i_D1
=dq_o_data_muxed
[1],
461 i_D2
=dq_o_data_muxed
[2],
462 i_D3
=dq_o_data_muxed
[3],
465 p_DEL_MODE
= "DQS_ALIGNED_X2",
468 Instance("IDDRX2DQA",
469 i_RST
=ResetSignal("dramsync"),
470 i_ECLK
=ClockSignal("sync2x"),
471 i_SCLK
=ClockSignal(),
485 i_RST
=ResetSignal("dramsync"),
486 i_ECLK
=ClockSignal("sync2x"),
487 i_SCLK
=ClockSignal(),
496 io_B
=self
.pads
.dq
.io
[j
])
498 with m
.If(~datavalid_prev
& datavalid
):
500 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
501 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
502 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
503 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
505 with m
.Elif(datavalid
):
507 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
508 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
509 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
510 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
513 # Read Control Path ------------------------------------------------------------------------
514 # Creates a shift register of read commands coming from the DFI interface. This shift register
515 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
516 # DFI interface that the read data is valid.
518 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
519 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
521 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
522 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
523 rddata_en
= Signal(self
.settings
.read_latency
)
524 rddata_en_last
= Signal
.like(rddata_en
)
525 m
.d
.comb
+= rddata_en
.eq(Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
526 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
527 m
.d
.comb
+= dqs_re
.eq(rddata_en
[cl_sys_latency
+ 1] | rddata_en
[cl_sys_latency
+ 2])
529 rddata_valid
= Signal()
530 m
.d
.sync
+= rddata_valid
.eq(datavalid_prev
& ~datavalid
)
531 for phase
in dfi
.phases
:
532 m
.d
.comb
+= phase
.rddata_valid
.eq(rddata_valid
)
534 # Write Control Path -----------------------------------------------------------------------
535 # Creates a shift register of write commands coming from the DFI interface. This shift register
536 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
537 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
538 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
539 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
540 # FIXME: understand +2
541 wrdata_en
= Signal(cwl_sys_latency
+ 4)
542 wrdata_en_last
= Signal
.like(wrdata_en
)
543 m
.d
.comb
+= wrdata_en
.eq(Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
544 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
545 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 1] | wrdata_en
[cwl_sys_latency
+ 2])
546 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
547 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
549 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
550 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
551 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
552 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
553 m
.d
.comb
+= dqs_preamble
.eq(wrdata_en
[cwl_sys_latency
+ 0] & ~wrdata_en
[cwl_sys_latency
+ 1])
554 m
.d
.comb
+= dqs_postamble
.eq(wrdata_en
[cwl_sys_latency
+ 3] & ~wrdata_en
[cwl_sys_latency
+ 2])