1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
12 from nmigen
.hdl
.ast
import Rose
13 from nmigen
.lib
.cdc
import FFSynchronizer
14 from nmigen
.utils
import log2_int
16 from lambdasoc
.periph
import Peripheral
18 from gram
.common
import *
19 from gram
.phy
.dfi
import Interface
20 from gram
.compat
import Timeline
22 __all__
= ["ECP5DDRPHY"]
25 class ECP5DDRPHYInit(Elaboratable
):
32 def elaborate(self
, platform
):
38 # DDRDLLA instance -------------------------------------------------------------------------
42 m
.submodules
+= Instance("DDRDLLA",
43 i_CLK
=ClockSignal("sync2x"),
44 i_RST
=ResetSignal("init"),
49 m
.submodules
+= FFSynchronizer(_lock
, lock
, o_domain
="init")
50 m
.d
.init
+= lock_d
.eq(lock
)
52 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
55 (1*t
, [freeze
.eq(1)]), # Freeze DDRDLLA
56 (2*t
, [self
.stop
.eq(1)]), # Stop ECLK domain
57 (3*t
, [self
.reset
.eq(1)]), # Reset ECLK domain
58 (4*t
, [self
.reset
.eq(0)]), # Release ECLK domain reset
59 (5*t
, [self
.stop
.eq(0)]), # Release ECLK domain stop
60 (6*t
, [freeze
.eq(0)]), # Release DDRDLLA freeze
61 (7*t
, [self
.pause
.eq(1)]), # Pause DQSBUFM
62 (8*t
, [update
.eq(1)]), # Update DDRDLLA
63 (9*t
, [update
.eq(0)]), # Release DDRDMMA update
64 (10*t
, [self
.pause
.eq(0)]), # Release DQSBUFM pause
66 m
.d
.comb
+= tl
.trigger
.eq(lock
& ~lock_d
) # Trigger timeline on lock rising edge
67 m
.submodules
+= DomainRenamer("init")(tl
)
72 class _DQSBUFMSettingManager(Elaboratable
):
73 """DQSBUFM setting manager.
75 The DQSBUFM primitive requires a very basic sequence when updating
76 read delay or other parameters. This elaboratable generates this
77 sequence from CSR events.
82 CSR storing the rdly value.
87 Pause signal for DQSBUFM.
88 readclksel : Signal(3), out
89 Readclksel signal for DQSBUFM.
91 def __init__(self
, rdly_csr
):
92 self
.rdly_csr
= rdly_csr
95 self
.readclksel
= Signal(3)
97 def elaborate(self
, platform
):
101 with m
.State("Idle"):
102 with m
.If(self
.rdly_csr
.w_stb
):
103 m
.d
.sync
+= self
.pause
.eq(1)
104 m
.next
= "RdlyUpdateRequested"
106 with m
.State("RdlyUpdateRequested"):
107 m
.d
.sync
+= self
.readclksel
.eq(self
.rdly_csr
.w_data
)
108 m
.next
= "ResetPause"
110 with m
.State("ResetPause"):
111 m
.d
.sync
+= self
.pause
.eq(0)
117 class ECP5DDRPHY(Peripheral
, Elaboratable
):
118 def __init__(self
, pads
, sys_clk_freq
=100e6
):
119 super().__init
__(name
="phy")
122 self
._sys
_clk
_freq
= sys_clk_freq
124 databits
= len(self
.pads
.dq
.io
)
125 if databits
% 8 != 0:
126 raise ValueError("DQ pads should come in a multiple of 8")
129 bank
= self
.csr_bank()
131 self
.burstdet
= bank
.csr(databits
//8, "rw")
134 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p0")]
135 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p1")]
137 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
138 self
.bus
= self
._bridge
.bus
140 addressbits
= len(self
.pads
.a
.o0
)
141 bankbits
= len(self
.pads
.ba
.o0
)
142 nranks
= 1 if not hasattr(self
.pads
, "cs") else len(self
.pads
.cs
.o0
)
143 databits
= len(self
.pads
.dq
.io
)
144 self
.dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4)
146 # PHY settings -----------------------------------------------------------------------------
147 tck
= 1/(2*self
._sys
_clk
_freq
)
149 databits
= len(self
.pads
.dq
.io
)
150 nranks
= 1 if not hasattr(self
.pads
, "cs") else len(self
.pads
.cs
.o0
)
151 cl
, cwl
= get_cl_cw("DDR3", tck
)
152 cl_sys_latency
= get_sys_latency(nphases
, cl
)
153 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
154 rdcmdphase
, rdphase
= get_sys_phases(nphases
, cl_sys_latency
, cl
)
155 wrcmdphase
, wrphase
= get_sys_phases(nphases
, cwl_sys_latency
, cwl
)
156 self
.settings
= PhySettings(
157 phytype
="ECP5DDRPHY",
160 dfi_databits
=4*databits
,
165 rdcmdphase
=rdcmdphase
,
166 wrcmdphase
=wrcmdphase
,
169 read_latency
=2 + cl_sys_latency
+ 2 + log2_int(4//nphases
) + 4,
170 write_latency
=cwl_sys_latency
173 def elaborate(self
, platform
):
176 m
.submodules
.bridge
= self
._bridge
178 tck
= 1/(2*self
._sys
_clk
_freq
)
180 databits
= len(self
.pads
.dq
.io
)
182 burstdet_reg
= Signal(databits
//8, reset_less
=True)
183 m
.d
.comb
+= self
.burstdet
.r_data
.eq(burstdet_reg
)
186 with m
.If(self
.burstdet
.w_stb
):
187 m
.d
.sync
+= burstdet_reg
.eq(0)
189 # Init -------------------------------------------------------------------------------------
190 m
.submodules
.init
= init
= ECP5DDRPHYInit()
192 # Parameters -------------------------------------------------------------------------------
193 cl
, cwl
= get_cl_cw("DDR3", tck
)
194 cl_sys_latency
= get_sys_latency(nphases
, cl
)
195 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
197 # DFI Interface ----------------------------------------------------------------------------
202 # Clock --------------------------------------------------------------------------------
204 self
.pads
.clk
.o_clk
.eq(ClockSignal("dramsync")),
205 self
.pads
.clk
.o_fclk
.eq(ClockSignal("sync2x")),
207 for i
in range(len(self
.pads
.clk
.o0
)):
209 self
.pads
.clk
.o0
[i
].eq(0),
210 self
.pads
.clk
.o1
[i
].eq(1),
211 self
.pads
.clk
.o2
[i
].eq(0),
212 self
.pads
.clk
.o3
[i
].eq(1),
215 # Addresses and Commands ---------------------------------------------------------------
217 self
.pads
.a
.o_clk
.eq(ClockSignal("dramsync")),
218 self
.pads
.a
.o_fclk
.eq(ClockSignal("sync2x")),
219 self
.pads
.ba
.o_clk
.eq(ClockSignal("dramsync")),
220 self
.pads
.ba
.o_fclk
.eq(ClockSignal("sync2x")),
222 for i
in range(len(self
.pads
.a
.o0
)):
224 self
.pads
.a
.o0
[i
].eq(dfi
.phases
[0].address
[i
]),
225 self
.pads
.a
.o1
[i
].eq(dfi
.phases
[0].address
[i
]),
226 self
.pads
.a
.o2
[i
].eq(dfi
.phases
[1].address
[i
]),
227 self
.pads
.a
.o3
[i
].eq(dfi
.phases
[1].address
[i
]),
229 for i
in range(len(self
.pads
.ba
.o0
)):
231 self
.pads
.ba
.o0
[i
].eq(dfi
.phases
[0].bank
[i
]),
232 self
.pads
.ba
.o1
[i
].eq(dfi
.phases
[0].bank
[i
]),
233 self
.pads
.ba
.o2
[i
].eq(dfi
.phases
[1].bank
[i
]),
234 self
.pads
.ba
.o3
[i
].eq(dfi
.phases
[1].bank
[i
]),
238 controls
= ["ras", "cas", "we", "clk_en", "odt"]
239 if hasattr(self
.pads
, "reset"):
240 controls
.append("reset")
241 if hasattr(self
.pads
, "cs"):
242 controls
.append("cs")
243 for name
in controls
:
245 getattr(self
.pads
, name
).o_clk
.eq(ClockSignal("dramsync")),
246 getattr(self
.pads
, name
).o_fclk
.eq(ClockSignal("sync2x")),
248 for i
in range(len(getattr(self
.pads
, name
).o0
)):
250 getattr(self
.pads
, name
).o0
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
251 getattr(self
.pads
, name
).o1
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
252 getattr(self
.pads
, name
).o2
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
253 getattr(self
.pads
, name
).o3
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
256 # DQ ---------------------------------------------------------------------------------------
260 dqs_postamble
= Signal()
261 dqs_preamble
= Signal()
262 for i
in range(databits
//8):
272 datavalid_prev
= Signal()
273 m
.d
.sync
+= datavalid_prev
.eq(datavalid
)
275 dqsbufm_manager
= _DQSBUFMSettingManager(self
.rdly
[i
])
276 setattr(m
.submodules
, f
"dqsbufm_manager{i}", dqsbufm_manager
)
278 m
.submodules
+= Instance("DQSBUFM",
279 p_DQS_LI_DEL_ADJ
="MINUS",
281 p_DQS_LO_DEL_ADJ
="MINUS",
295 i_SCLK
=ClockSignal("sync"),
296 i_ECLK
=ClockSignal("sync2x"),
297 i_RST
=ResetSignal("dramsync"),
299 i_PAUSE
=init
.pause | dqsbufm_manager
.pause
,
302 # Assert LOADNs to use DDRDEL control
310 # Reads (generate shifted DQS clock for reads)
313 i_READCLKSEL0
=dqsbufm_manager
.readclksel
[0],
314 i_READCLKSEL1
=dqsbufm_manager
.readclksel
[1],
315 i_READCLKSEL2
=dqsbufm_manager
.readclksel
[2],
325 o_DATAVALID
=datavalid
,
327 # Writes (generate shifted ECLK clock for writes)
331 with m
.If(Rose(burstdet
)):
332 m
.d
.sync
+= burstdet_reg
[i
].eq(1)
334 # DQS and DM ---------------------------------------------------------------------------
335 dm_o_data
= Signal(8)
336 dm_o_data_d
= Signal(8, reset_less
=True)
337 dm_o_data_muxed
= Signal(4, reset_less
=True)
338 m
.d
.comb
+= dm_o_data
.eq(Cat(
339 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
340 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
341 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
342 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
344 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
345 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
346 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
347 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
349 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
351 with m
.If(bl8_chunk
):
352 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data_d
[4:])
354 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data
[:4])
356 m
.submodules
+= Instance("ODDRX2DQA",
357 i_RST
=ResetSignal("dramsync"),
358 i_ECLK
=ClockSignal("sync2x"),
359 i_SCLK
=ClockSignal("dramsync"),
361 i_D0
=dm_o_data_muxed
[0],
362 i_D1
=dm_o_data_muxed
[1],
363 i_D2
=dm_o_data_muxed
[2],
364 i_D3
=dm_o_data_muxed
[3],
365 o_Q
=self
.pads
.dm
.o
[i
])
370 Instance("ODDRX2DQSB",
371 i_RST
=ResetSignal("dramsync"),
372 i_ECLK
=ClockSignal("sync2x"),
373 i_SCLK
=ClockSignal(),
380 Instance("TSHX2DQSA",
381 i_RST
=ResetSignal("dramsync"),
382 i_ECLK
=ClockSignal("sync2x"),
383 i_SCLK
=ClockSignal(),
385 i_T0
=~
(dqs_oe | dqs_postamble
),
386 i_T1
=~
(dqs_oe | dqs_preamble
),
392 io_B
=self
.pads
.dqs
.p
[i
]),
395 for j
in range(8*i
, 8*(i
+1)):
399 dq_i_delayed
= Signal()
400 dq_i_data
= Signal(4)
401 dq_o_data
= Signal(8)
402 dq_o_data_d
= Signal(8, reset_less
=True)
403 dq_o_data_muxed
= Signal(4, reset_less
=True)
404 m
.d
.comb
+= dq_o_data
.eq(Cat(
405 dfi
.phases
[0].wrdata
[0*databits
+j
],
406 dfi
.phases
[0].wrdata
[1*databits
+j
],
407 dfi
.phases
[0].wrdata
[2*databits
+j
],
408 dfi
.phases
[0].wrdata
[3*databits
+j
],
409 dfi
.phases
[1].wrdata
[0*databits
+j
],
410 dfi
.phases
[1].wrdata
[1*databits
+j
],
411 dfi
.phases
[1].wrdata
[2*databits
+j
],
412 dfi
.phases
[1].wrdata
[3*databits
+j
])
415 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
416 with m
.If(bl8_chunk
):
417 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data_d
[4:])
419 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data
[:4])
422 Instance("ODDRX2DQA",
423 i_RST
=ResetSignal("dramsync"),
424 i_ECLK
=ClockSignal("sync2x"),
425 i_SCLK
=ClockSignal(),
427 i_D0
=dq_o_data_muxed
[0],
428 i_D1
=dq_o_data_muxed
[1],
429 i_D2
=dq_o_data_muxed
[2],
430 i_D3
=dq_o_data_muxed
[3],
433 p_DEL_MODE
="DQS_ALIGNED_X2",
439 Instance("IDDRX2DQA",
440 i_RST
=ResetSignal("dramsync"),
441 i_ECLK
=ClockSignal("sync2x"),
442 i_SCLK
=ClockSignal(),
456 i_RST
=ResetSignal("dramsync"),
457 i_ECLK
=ClockSignal("sync2x"),
458 i_SCLK
=ClockSignal(),
467 io_B
=self
.pads
.dq
.io
[j
])
469 with m
.If(~datavalid_prev
& datavalid
):
471 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
472 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
473 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
474 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
476 with m
.Elif(datavalid
):
478 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
479 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
480 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
481 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
484 # Read Control Path ------------------------------------------------------------------------
485 # Creates a shift register of read commands coming from the DFI interface. This shift register
486 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
487 # DFI interface that the read data is valid.
489 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
490 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
492 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
493 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
494 rddata_en
= Signal(self
.settings
.read_latency
)
495 rddata_en_last
= Signal
.like(rddata_en
)
496 m
.d
.comb
+= rddata_en
.eq(Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
497 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
498 m
.d
.comb
+= dqs_re
.eq(rddata_en
[cl_sys_latency
+ 1] | rddata_en
[cl_sys_latency
+ 2])
500 rddata_valid
= Signal()
501 m
.d
.sync
+= rddata_valid
.eq(datavalid_prev
& ~datavalid
)
502 for phase
in dfi
.phases
:
503 m
.d
.comb
+= phase
.rddata_valid
.eq(rddata_valid
)
505 # Write Control Path -----------------------------------------------------------------------
506 # Creates a shift register of write commands coming from the DFI interface. This shift register
507 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
508 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
509 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
510 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
511 # FIXME: understand +2
512 wrdata_en
= Signal(cwl_sys_latency
+ 4)
513 wrdata_en_last
= Signal
.like(wrdata_en
)
514 m
.d
.comb
+= wrdata_en
.eq(Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
515 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
516 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 1] | wrdata_en
[cwl_sys_latency
+ 2])
517 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
518 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
520 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
521 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
522 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
523 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
524 m
.d
.comb
+= dqs_preamble
.eq(wrdata_en
[cwl_sys_latency
+ 0] & ~wrdata_en
[cwl_sys_latency
+ 1])
525 m
.d
.comb
+= dqs_postamble
.eq(wrdata_en
[cwl_sys_latency
+ 3] & ~wrdata_en
[cwl_sys_latency
+ 2])