1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
12 from nmigen
.lib
.cdc
import FFSynchronizer
13 from nmigen
.utils
import log2_int
15 from lambdasoc
.periph
import Peripheral
17 from gram
.common
import *
18 from gram
.phy
.dfi
import Interface
19 from gram
.compat
import Timeline
21 __all__
= ["ECP5DDRPHY"]
24 class ECP5DDRPHYInit(Elaboratable
):
31 def elaborate(self
, platform
):
38 # DDRDLLA instance -------------------------------------------------------------------------
41 m
.submodules
+= Instance("DDRDLLA",
42 i_CLK
=ClockSignal("sync2x"),
43 i_RST
=ResetSignal("init"),
50 m
.submodules
+= FFSynchronizer(_lock
, lock
, o_domain
="init")
51 m
.d
.init
+= lock_d
.eq(lock
)
52 m
.d
.sync
+= new_lock
.eq(lock
& ~lock_d
)
54 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
57 (1*t
, [freeze
.eq(1)]), # Freeze DDRDLLA
58 (2*t
, [self
.stop
.eq(1)]), # Stop ECLK domain
59 (3*t
, [self
.reset
.eq(1)]), # Reset ECLK domain
60 (4*t
, [self
.reset
.eq(0)]), # Release ECLK domain reset
61 (5*t
, [self
.stop
.eq(0)]), # Release ECLK domain stop
62 (6*t
, [freeze
.eq(0)]), # Release DDRDLLA freeze
63 (7*t
, [self
.pause
.eq(1)]), # Pause DQSBUFM
64 (8*t
, [update
.eq(1)]), # Update DDRDLLA
65 (9*t
, [update
.eq(0)]), # Release DDRDMMA update
66 (10*t
, [self
.pause
.eq(0)]), # Release DQSBUFM pause
70 m
.d
.comb
+= tl
.trigger
.eq(new_lock
)
72 m
.d
.comb
+= self
.delay
.eq(delay
)
77 class _DQSBUFMSettingManager(Elaboratable
):
78 """DQSBUFM setting manager.
80 The DQSBUFM primitive requires a very basic sequence when updating
81 read delay or other parameters. This elaboratable generates this
82 sequence from CSR events.
87 CSR storing the rdly value.
92 Pause signal for DQSBUFM.
93 readclksel : Signal(3), out
94 Readclksel signal for DQSBUFM.
96 def __init__(self
, rdly_csr
):
97 self
.rdly_csr
= rdly_csr
100 self
.readclksel
= Signal(3)
102 def elaborate(self
, platform
):
106 with m
.State("Idle"):
107 with m
.If(self
.rdly_csr
.w_stb
):
108 m
.d
.sync
+= self
.pause
.eq(1)
109 m
.next
= "RdlyUpdateRequested"
111 with m
.State("RdlyUpdateRequested"):
112 m
.d
.sync
+= self
.readclksel
.eq(self
.rdly_csr
.w_data
)
113 m
.next
= "ResetPause"
115 with m
.State("ResetPause"):
116 m
.d
.sync
+= self
.pause
.eq(0)
122 class ECP5DDRPHY(Peripheral
, Elaboratable
):
123 def __init__(self
, pads
, sys_clk_freq
=100e6
):
124 super().__init
__(name
="phy")
127 self
._sys
_clk
_freq
= sys_clk_freq
129 databits
= len(self
.pads
.dq
.io
)
130 if databits
% 8 != 0:
131 raise ValueError("DQ pads should come in a multiple of 8")
134 bank
= self
.csr_bank()
136 self
.burstdet
= bank
.csr(databits
//8, "rw")
139 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p0")]
140 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p1")]
142 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
143 self
.bus
= self
._bridge
.bus
145 addressbits
= len(self
.pads
.a
.o0
)
146 bankbits
= len(self
.pads
.ba
.o0
)
147 nranks
= 1 if not hasattr(self
.pads
, "cs") else len(self
.pads
.cs
.o0
)
148 databits
= len(self
.pads
.dq
.io
)
149 self
.dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4)
151 # PHY settings -----------------------------------------------------------------------------
152 tck
= 1/(2*self
._sys
_clk
_freq
)
154 databits
= len(self
.pads
.dq
.io
)
155 nranks
= 1 if not hasattr(self
.pads
, "cs") else len(self
.pads
.cs
.o0
)
156 cl
, cwl
= get_cl_cw("DDR3", tck
)
157 cl_sys_latency
= get_sys_latency(nphases
, cl
)
158 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
159 rdcmdphase
, rdphase
= get_sys_phases(nphases
, cl_sys_latency
, cl
)
160 wrcmdphase
, wrphase
= get_sys_phases(nphases
, cwl_sys_latency
, cwl
)
161 self
.settings
= PhySettings(
162 phytype
="ECP5DDRPHY",
165 dfi_databits
=4*databits
,
170 rdcmdphase
=rdcmdphase
,
171 wrcmdphase
=wrcmdphase
,
174 read_latency
=2 + cl_sys_latency
+ 2 + log2_int(4//nphases
) + 4,
175 write_latency
=cwl_sys_latency
178 def elaborate(self
, platform
):
181 m
.submodules
.bridge
= self
._bridge
183 tck
= 1/(2*self
._sys
_clk
_freq
)
185 databits
= len(self
.pads
.dq
.io
)
187 burstdet_reg
= Signal(databits
//8, reset_less
=True)
188 m
.d
.comb
+= self
.burstdet
.r_data
.eq(burstdet_reg
)
191 with m
.If(self
.burstdet
.w_stb
):
192 m
.d
.sync
+= burstdet_reg
.eq(0)
194 # Init -------------------------------------------------------------------------------------
195 m
.submodules
.init
= init
= ECP5DDRPHYInit()
197 # Parameters -------------------------------------------------------------------------------
198 cl
, cwl
= get_cl_cw("DDR3", tck
)
199 cl_sys_latency
= get_sys_latency(nphases
, cl
)
200 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
202 # DFI Interface ----------------------------------------------------------------------------
207 # Clock --------------------------------------------------------------------------------
209 self
.pads
.clk
.o_clk
.eq(ClockSignal("dramsync")),
210 self
.pads
.clk
.o_fclk
.eq(ClockSignal("sync2x")),
212 for i
in range(len(self
.pads
.clk
.o0
)):
214 self
.pads
.clk
.o0
[i
].eq(0),
215 self
.pads
.clk
.o1
[i
].eq(1),
216 self
.pads
.clk
.o2
[i
].eq(0),
217 self
.pads
.clk
.o3
[i
].eq(1),
220 # Addresses and Commands ---------------------------------------------------------------
222 self
.pads
.a
.o_clk
.eq(ClockSignal("dramsync")),
223 self
.pads
.a
.o_fclk
.eq(ClockSignal("sync2x")),
224 self
.pads
.ba
.o_clk
.eq(ClockSignal("dramsync")),
225 self
.pads
.ba
.o_fclk
.eq(ClockSignal("sync2x")),
227 for i
in range(len(self
.pads
.a
.o0
)):
229 self
.pads
.a
.o0
[i
].eq(dfi
.phases
[0].address
[i
]),
230 self
.pads
.a
.o1
[i
].eq(dfi
.phases
[0].address
[i
]),
231 self
.pads
.a
.o2
[i
].eq(dfi
.phases
[1].address
[i
]),
232 self
.pads
.a
.o3
[i
].eq(dfi
.phases
[1].address
[i
]),
234 for i
in range(len(self
.pads
.ba
.o0
)):
236 self
.pads
.ba
.o0
[i
].eq(dfi
.phases
[0].bank
[i
]),
237 self
.pads
.ba
.o1
[i
].eq(dfi
.phases
[0].bank
[i
]),
238 self
.pads
.ba
.o2
[i
].eq(dfi
.phases
[1].bank
[i
]),
239 self
.pads
.ba
.o3
[i
].eq(dfi
.phases
[1].bank
[i
]),
243 controls
= ["ras", "cas", "we", "clk_en", "odt"]
244 if hasattr(self
.pads
, "reset"):
245 controls
.append("reset")
246 if hasattr(self
.pads
, "cs"):
247 controls
.append("cs")
248 for name
in controls
:
250 getattr(self
.pads
, name
).o_clk
.eq(ClockSignal("dramsync")),
251 getattr(self
.pads
, name
).o_fclk
.eq(ClockSignal("sync2x")),
253 for i
in range(len(getattr(self
.pads
, name
).o0
)):
255 getattr(self
.pads
, name
).o0
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
256 getattr(self
.pads
, name
).o1
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
257 getattr(self
.pads
, name
).o2
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
258 getattr(self
.pads
, name
).o3
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
261 # DQ ---------------------------------------------------------------------------------------
265 dqs_postamble
= Signal()
266 dqs_preamble
= Signal()
267 for i
in range(databits
//8):
277 datavalid_prev
= Signal()
278 m
.d
.sync
+= datavalid_prev
.eq(datavalid
)
280 dqsbufm_manager
= _DQSBUFMSettingManager(self
.rdly
[i
])
281 setattr(m
.submodules
, f
"dqsbufm_manager{i}", dqsbufm_manager
)
283 m
.submodules
+= Instance("DQSBUFM",
284 p_DQS_LI_DEL_ADJ
="MINUS",
286 p_DQS_LO_DEL_ADJ
="MINUS",
300 i_SCLK
=ClockSignal("sync"),
301 i_ECLK
=ClockSignal("sync2x"),
302 i_RST
=ResetSignal("dramsync"),
304 i_PAUSE
=init
.pause | dqsbufm_manager
.pause
,
307 # Assert LOADNs to use DDRDEL control
315 # Reads (generate shifted DQS clock for reads)
318 i_READCLKSEL0
=dqsbufm_manager
.readclksel
[0],
319 i_READCLKSEL1
=dqsbufm_manager
.readclksel
[1],
320 i_READCLKSEL2
=dqsbufm_manager
.readclksel
[2],
330 o_DATAVALID
=datavalid
,
332 # Writes (generate shifted ECLK clock for writes)
337 m
.d
.sync
+= burstdet_reg
[i
].eq(1)
339 # DQS and DM ---------------------------------------------------------------------------
340 dm_o_data
= Signal(8)
341 dm_o_data_d
= Signal(8)
342 dm_o_data_muxed
= Signal(4)
343 m
.d
.comb
+= dm_o_data
.eq(Cat(
344 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
345 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
346 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
347 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
349 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
350 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
351 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
352 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
354 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
356 with m
.If(bl8_chunk
):
357 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data_d
[4:])
359 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data
[:4])
361 m
.submodules
+= Instance("ODDRX2DQA",
362 i_RST
=ResetSignal("dramsync"),
363 i_ECLK
=ClockSignal("sync2x"),
364 i_SCLK
=ClockSignal("dramsync"),
366 i_D0
=dm_o_data_muxed
[0],
367 i_D1
=dm_o_data_muxed
[1],
368 i_D2
=dm_o_data_muxed
[2],
369 i_D3
=dm_o_data_muxed
[3],
370 o_Q
=self
.pads
.dm
.o
[i
])
375 Instance("ODDRX2DQSB",
376 i_RST
=ResetSignal("dramsync"),
377 i_ECLK
=ClockSignal("sync2x"),
378 i_SCLK
=ClockSignal(),
385 Instance("TSHX2DQSA",
386 i_RST
=ResetSignal("dramsync"),
387 i_ECLK
=ClockSignal("sync2x"),
388 i_SCLK
=ClockSignal(),
390 i_T0
=~
(dqs_oe | dqs_postamble
),
391 i_T1
=~
(dqs_oe | dqs_preamble
),
397 io_B
=self
.pads
.dqs
.p
[i
]),
400 for j
in range(8*i
, 8*(i
+1)):
404 dq_i_delayed
= Signal()
405 dq_i_data
= Signal(4)
406 dq_o_data
= Signal(8)
407 dq_o_data_d
= Signal(8)
408 dq_o_data_muxed
= Signal(4)
409 m
.d
.comb
+= dq_o_data
.eq(Cat(
410 dfi
.phases
[0].wrdata
[0*databits
+j
],
411 dfi
.phases
[0].wrdata
[1*databits
+j
],
412 dfi
.phases
[0].wrdata
[2*databits
+j
],
413 dfi
.phases
[0].wrdata
[3*databits
+j
],
414 dfi
.phases
[1].wrdata
[0*databits
+j
],
415 dfi
.phases
[1].wrdata
[1*databits
+j
],
416 dfi
.phases
[1].wrdata
[2*databits
+j
],
417 dfi
.phases
[1].wrdata
[3*databits
+j
])
420 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
421 with m
.If(bl8_chunk
):
422 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data_d
[4:])
424 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data
[:4])
427 Instance("ODDRX2DQA",
428 i_RST
=ResetSignal("dramsync"),
429 i_ECLK
=ClockSignal("sync2x"),
430 i_SCLK
=ClockSignal(),
432 i_D0
=dq_o_data_muxed
[0],
433 i_D1
=dq_o_data_muxed
[1],
434 i_D2
=dq_o_data_muxed
[2],
435 i_D3
=dq_o_data_muxed
[3],
438 p_DEL_MODE
="DQS_ALIGNED_X2",
444 Instance("IDDRX2DQA",
445 i_RST
=ResetSignal("dramsync"),
446 i_ECLK
=ClockSignal("sync2x"),
447 i_SCLK
=ClockSignal(),
461 i_RST
=ResetSignal("dramsync"),
462 i_ECLK
=ClockSignal("sync2x"),
463 i_SCLK
=ClockSignal(),
472 io_B
=self
.pads
.dq
.io
[j
])
474 with m
.If(~datavalid_prev
& datavalid
):
476 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
477 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
478 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
479 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
481 with m
.Elif(datavalid
):
483 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
484 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
485 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
486 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
489 # Read Control Path ------------------------------------------------------------------------
490 # Creates a shift register of read commands coming from the DFI interface. This shift register
491 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
492 # DFI interface that the read data is valid.
494 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
495 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
497 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
498 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
499 rddata_en
= Signal(self
.settings
.read_latency
)
500 rddata_en_last
= Signal
.like(rddata_en
)
501 m
.d
.comb
+= rddata_en
.eq(Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
502 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
503 m
.d
.comb
+= dqs_re
.eq(rddata_en
[cl_sys_latency
+ 0] | rddata_en
[cl_sys_latency
+ 1] | rddata_en
[cl_sys_latency
+ 2])
505 rddata_valid
= Signal()
506 m
.d
.sync
+= rddata_valid
.eq(datavalid_prev
& ~datavalid
)
507 for phase
in dfi
.phases
:
508 m
.d
.comb
+= phase
.rddata_valid
.eq(rddata_valid
)
510 # Write Control Path -----------------------------------------------------------------------
511 # Creates a shift register of write commands coming from the DFI interface. This shift register
512 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
513 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
514 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
515 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
516 # FIXME: understand +2
517 wrdata_en
= Signal(cwl_sys_latency
+ 4)
518 wrdata_en_last
= Signal
.like(wrdata_en
)
519 m
.d
.comb
+= wrdata_en
.eq(Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
520 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
521 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 1] | wrdata_en
[cwl_sys_latency
+ 2])
522 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
523 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
525 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
526 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
527 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
528 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
529 m
.d
.comb
+= dqs_preamble
.eq(wrdata_en
[cwl_sys_latency
+ 0] & ~wrdata_en
[cwl_sys_latency
+ 1])
530 m
.d
.comb
+= dqs_postamble
.eq(wrdata_en
[cwl_sys_latency
+ 3] & ~wrdata_en
[cwl_sys_latency
+ 2])