1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
12 from nmigen
.lib
.cdc
import FFSynchronizer
13 from nmigen
.utils
import log2_int
15 from lambdasoc
.periph
import Peripheral
17 import gram
.stream
as stream
18 from gram
.common
import *
19 from gram
.phy
.dfi
import Interface
20 from gram
.compat
import Timeline
22 __all__
= ["ECP5DDRPHY"]
25 class ECP5DDRPHYInit(Elaboratable
):
32 def elaborate(self
, platform
):
39 # DDRDLLA instance -------------------------------------------------------------------------
42 m
.submodules
+= Instance("DDRDLLA",
43 i_CLK
=ClockSignal("sync2x"),
44 i_RST
=ResetSignal("init"),
51 m
.submodules
+= FFSynchronizer(_lock
, lock
, o_domain
="init")
52 m
.d
.init
+= lock_d
.eq(lock
)
53 m
.d
.sync
+= new_lock
.eq(lock
& ~lock_d
)
55 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
58 (1*t
, [freeze
.eq(1)]), # Freeze DDRDLLA
59 (2*t
, [self
.stop
.eq(1)]), # Stop ECLK domain
60 (3*t
, [self
.reset
.eq(1)]), # Reset ECLK domain
61 (4*t
, [self
.reset
.eq(0)]), # Release ECLK domain reset
62 (5*t
, [self
.stop
.eq(0)]), # Release ECLK domain stop
63 (6*t
, [freeze
.eq(0)]), # Release DDRDLLA freeze
64 (7*t
, [self
.pause
.eq(1)]), # Pause DQSBUFM
65 (8*t
, [update
.eq(1)]), # Update DDRDLLA
66 (9*t
, [update
.eq(0)]), # Release DDRDMMA update
67 (10*t
, [self
.pause
.eq(0)]), # Release DQSBUFM pause
71 m
.d
.comb
+= tl
.trigger
.eq(new_lock
)
73 m
.d
.comb
+= self
.delay
.eq(delay
)
78 class _DQSBUFMSettingManager(Elaboratable
):
79 def __init__(self
, rdly_csr
):
80 self
.rdly_csr
= rdly_csr
83 self
.readclksel
= Signal(3)
85 def elaborate(self
, platform
):
90 with m
.If(self
.rdly_csr
.w_stb
):
91 m
.d
.sync
+= self
.pause
.eq(1)
92 m
.next
= "RdlyUpdateRequested"
94 with m
.State("RdlyUpdateRequested"):
95 m
.d
.sync
+= self
.readclksel
.eq(self
.rdly_csr
.w_data
)
98 with m
.State("ResetPause"):
99 m
.d
.sync
+= self
.pause
.eq(0)
105 class ECP5DDRPHY(Peripheral
, Elaboratable
):
106 def __init__(self
, pads
, sys_clk_freq
=100e6
):
107 super().__init
__(name
="phy")
110 self
._sys
_clk
_freq
= sys_clk_freq
112 databits
= len(self
.pads
.dq
.io
)
113 if databits
% 8 != 0:
114 raise ValueError("DQ pads should come in a multiple of 8")
117 bank
= self
.csr_bank()
119 self
.burstdet
= bank
.csr(databits
//8, "rw")
122 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p0")]
123 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p1")]
125 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
126 self
.bus
= self
._bridge
.bus
128 addressbits
= len(self
.pads
.a
.o0
)
129 bankbits
= len(self
.pads
.ba
.o0
)
130 nranks
= 1 if not hasattr(self
.pads
, "cs") else len(self
.pads
.cs
.o0
)
131 databits
= len(self
.pads
.dq
.io
)
132 self
.dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4)
134 # PHY settings -----------------------------------------------------------------------------
135 tck
= 1/(2*self
._sys
_clk
_freq
)
137 databits
= len(self
.pads
.dq
.io
)
138 nranks
= 1 if not hasattr(self
.pads
, "cs") else len(self
.pads
.cs
.o0
)
139 cl
, cwl
= get_cl_cw("DDR3", tck
)
140 cl_sys_latency
= get_sys_latency(nphases
, cl
)
141 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
142 rdcmdphase
, rdphase
= get_sys_phases(nphases
, cl_sys_latency
, cl
)
143 wrcmdphase
, wrphase
= get_sys_phases(nphases
, cwl_sys_latency
, cwl
)
144 self
.settings
= PhySettings(
145 phytype
="ECP5DDRPHY",
148 dfi_databits
=4*databits
,
153 rdcmdphase
=rdcmdphase
,
154 wrcmdphase
=wrcmdphase
,
157 read_latency
=2 + cl_sys_latency
+ 2 + log2_int(4//nphases
) + 4,
158 write_latency
=cwl_sys_latency
161 def elaborate(self
, platform
):
164 m
.submodules
.bridge
= self
._bridge
166 tck
= 1/(2*self
._sys
_clk
_freq
)
168 databits
= len(self
.pads
.dq
.io
)
170 burstdet_reg
= Signal(databits
//8, reset_less
=True)
171 m
.d
.comb
+= self
.burstdet
.r_data
.eq(burstdet_reg
)
174 with m
.If(self
.burstdet
.w_stb
):
175 m
.d
.sync
+= burstdet_reg
.eq(0)
177 # Init -------------------------------------------------------------------------------------
178 m
.submodules
.init
= init
= ECP5DDRPHYInit()
180 # Parameters -------------------------------------------------------------------------------
181 cl
, cwl
= get_cl_cw("DDR3", tck
)
182 cl_sys_latency
= get_sys_latency(nphases
, cl
)
183 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
185 # DFI Interface ----------------------------------------------------------------------------
190 # Clock --------------------------------------------------------------------------------
192 self
.pads
.clk
.o_clk
.eq(ClockSignal("dramsync")),
193 self
.pads
.clk
.o_fclk
.eq(ClockSignal("sync2x")),
195 for i
in range(len(self
.pads
.clk
.o0
)):
197 self
.pads
.clk
.o0
[i
].eq(0),
198 self
.pads
.clk
.o1
[i
].eq(1),
199 self
.pads
.clk
.o2
[i
].eq(0),
200 self
.pads
.clk
.o3
[i
].eq(1),
203 # Addresses and Commands ---------------------------------------------------------------
205 self
.pads
.a
.o_clk
.eq(ClockSignal("dramsync")),
206 self
.pads
.a
.o_fclk
.eq(ClockSignal("sync2x")),
207 self
.pads
.ba
.o_clk
.eq(ClockSignal("dramsync")),
208 self
.pads
.ba
.o_fclk
.eq(ClockSignal("sync2x")),
210 for i
in range(len(self
.pads
.a
.o0
)):
212 self
.pads
.a
.o0
[i
].eq(dfi
.phases
[0].address
[i
]),
213 self
.pads
.a
.o1
[i
].eq(dfi
.phases
[0].address
[i
]),
214 self
.pads
.a
.o2
[i
].eq(dfi
.phases
[1].address
[i
]),
215 self
.pads
.a
.o3
[i
].eq(dfi
.phases
[1].address
[i
]),
217 for i
in range(len(self
.pads
.ba
.o0
)):
219 self
.pads
.ba
.o0
[i
].eq(dfi
.phases
[0].bank
[i
]),
220 self
.pads
.ba
.o1
[i
].eq(dfi
.phases
[0].bank
[i
]),
221 self
.pads
.ba
.o2
[i
].eq(dfi
.phases
[1].bank
[i
]),
222 self
.pads
.ba
.o3
[i
].eq(dfi
.phases
[1].bank
[i
]),
226 controls
= ["ras", "cas", "we", "clk_en", "odt"]
227 if hasattr(self
.pads
, "reset"):
228 controls
.append("reset")
229 if hasattr(self
.pads
, "cs"):
230 controls
.append("cs")
231 for name
in controls
:
233 getattr(self
.pads
, name
).o_clk
.eq(ClockSignal("dramsync")),
234 getattr(self
.pads
, name
).o_fclk
.eq(ClockSignal("sync2x")),
236 for i
in range(len(getattr(self
.pads
, name
).o0
)):
238 getattr(self
.pads
, name
).o0
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
239 getattr(self
.pads
, name
).o1
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
240 getattr(self
.pads
, name
).o2
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
241 getattr(self
.pads
, name
).o3
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
244 # DQ ---------------------------------------------------------------------------------------
248 dqs_postamble
= Signal()
249 dqs_preamble
= Signal()
250 for i
in range(databits
//8):
260 datavalid_prev
= Signal()
261 m
.d
.sync
+= datavalid_prev
.eq(datavalid
)
263 dqsbufm_manager
= _DQSBUFMSettingManager(self
.rdly
[i
])
264 setattr(m
.submodules
, f
"dqsbufm_manager{i}", dqsbufm_manager
)
266 m
.submodules
+= Instance("DQSBUFM",
267 p_DQS_LI_DEL_ADJ
="MINUS",
269 p_DQS_LO_DEL_ADJ
="MINUS",
283 i_SCLK
=ClockSignal("sync"),
284 i_ECLK
=ClockSignal("sync2x"),
285 i_RST
=ResetSignal("dramsync"),
287 i_PAUSE
=init
.pause | dqsbufm_manager
.pause
,
290 # Assert LOADNs to use DDRDEL control
298 # Reads (generate shifted DQS clock for reads)
301 i_READCLKSEL0
=dqsbufm_manager
.readclksel
[0],
302 i_READCLKSEL1
=dqsbufm_manager
.readclksel
[1],
303 i_READCLKSEL2
=dqsbufm_manager
.readclksel
[2],
313 o_DATAVALID
=datavalid
,
315 # Writes (generate shifted ECLK clock for writes)
320 m
.d
.sync
+= burstdet_reg
[i
].eq(1)
322 # DQS and DM ---------------------------------------------------------------------------
323 dm_o_data
= Signal(8)
324 dm_o_data_d
= Signal(8)
325 dm_o_data_muxed
= Signal(4)
326 m
.d
.comb
+= dm_o_data
.eq(Cat(
327 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
328 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
329 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
330 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
332 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
333 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
334 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
335 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
337 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
339 with m
.If(bl8_chunk
):
340 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data_d
[4:])
342 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data
[:4])
344 m
.submodules
+= Instance("ODDRX2DQA",
345 i_RST
=ResetSignal("dramsync"),
346 i_ECLK
=ClockSignal("sync2x"),
347 i_SCLK
=ClockSignal("dramsync"),
349 i_D0
=dm_o_data_muxed
[0],
350 i_D1
=dm_o_data_muxed
[1],
351 i_D2
=dm_o_data_muxed
[2],
352 i_D3
=dm_o_data_muxed
[3],
353 o_Q
=self
.pads
.dm
.o
[i
])
358 Instance("ODDRX2DQSB",
359 i_RST
=ResetSignal("dramsync"),
360 i_ECLK
=ClockSignal("sync2x"),
361 i_SCLK
=ClockSignal(),
368 Instance("TSHX2DQSA",
369 i_RST
=ResetSignal("dramsync"),
370 i_ECLK
=ClockSignal("sync2x"),
371 i_SCLK
=ClockSignal(),
373 i_T0
=~
(dqs_oe | dqs_postamble
),
374 i_T1
=~
(dqs_oe | dqs_preamble
),
380 io_B
=self
.pads
.dqs
.p
[i
]),
383 for j
in range(8*i
, 8*(i
+1)):
387 dq_i_delayed
= Signal()
388 dq_i_data
= Signal(4)
389 dq_o_data
= Signal(8)
390 dq_o_data_d
= Signal(8)
391 dq_o_data_muxed
= Signal(4)
392 m
.d
.comb
+= dq_o_data
.eq(Cat(
393 dfi
.phases
[0].wrdata
[0*databits
+j
],
394 dfi
.phases
[0].wrdata
[1*databits
+j
],
395 dfi
.phases
[0].wrdata
[2*databits
+j
],
396 dfi
.phases
[0].wrdata
[3*databits
+j
],
397 dfi
.phases
[1].wrdata
[0*databits
+j
],
398 dfi
.phases
[1].wrdata
[1*databits
+j
],
399 dfi
.phases
[1].wrdata
[2*databits
+j
],
400 dfi
.phases
[1].wrdata
[3*databits
+j
])
403 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
404 with m
.If(bl8_chunk
):
405 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data_d
[4:])
407 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data
[:4])
410 Instance("ODDRX2DQA",
411 i_RST
=ResetSignal("dramsync"),
412 i_ECLK
=ClockSignal("sync2x"),
413 i_SCLK
=ClockSignal(),
415 i_D0
=dq_o_data_muxed
[0],
416 i_D1
=dq_o_data_muxed
[1],
417 i_D2
=dq_o_data_muxed
[2],
418 i_D3
=dq_o_data_muxed
[3],
421 p_DEL_MODE
="DQS_ALIGNED_X2",
427 Instance("IDDRX2DQA",
428 i_RST
=ResetSignal("dramsync"),
429 i_ECLK
=ClockSignal("sync2x"),
430 i_SCLK
=ClockSignal(),
444 i_RST
=ResetSignal("dramsync"),
445 i_ECLK
=ClockSignal("sync2x"),
446 i_SCLK
=ClockSignal(),
455 io_B
=self
.pads
.dq
.io
[j
])
457 with m
.If(~datavalid_prev
& datavalid
):
459 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
460 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
461 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
462 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
464 with m
.Elif(datavalid
):
466 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
467 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
468 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
469 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
472 # Read Control Path ------------------------------------------------------------------------
473 # Creates a shift register of read commands coming from the DFI interface. This shift register
474 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
475 # DFI interface that the read data is valid.
477 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
478 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
480 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
481 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
482 rddata_en
= Signal(self
.settings
.read_latency
)
483 rddata_en_last
= Signal
.like(rddata_en
)
484 m
.d
.comb
+= rddata_en
.eq(Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
485 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
486 m
.d
.comb
+= dqs_re
.eq(rddata_en
[cl_sys_latency
+ 0] | rddata_en
[cl_sys_latency
+ 1] | rddata_en
[cl_sys_latency
+ 2])
488 rddata_valid
= Signal()
489 m
.d
.sync
+= rddata_valid
.eq(datavalid_prev
& ~datavalid
)
490 for phase
in dfi
.phases
:
491 m
.d
.comb
+= phase
.rddata_valid
.eq(rddata_valid
)
493 # Write Control Path -----------------------------------------------------------------------
494 # Creates a shift register of write commands coming from the DFI interface. This shift register
495 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
496 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
497 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
498 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
499 # FIXME: understand +2
500 wrdata_en
= Signal(cwl_sys_latency
+ 4)
501 wrdata_en_last
= Signal
.like(wrdata_en
)
502 m
.d
.comb
+= wrdata_en
.eq(Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
503 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
504 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 1] | wrdata_en
[cwl_sys_latency
+ 2])
505 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
506 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
508 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
509 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
510 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
511 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
512 m
.d
.comb
+= dqs_preamble
.eq(wrdata_en
[cwl_sys_latency
+ 0] & ~wrdata_en
[cwl_sys_latency
+ 1])
513 m
.d
.comb
+= dqs_postamble
.eq(wrdata_en
[cwl_sys_latency
+ 3] & ~wrdata_en
[cwl_sys_latency
+ 2])