e1d1147601001c19afd2c72a3c457ce92c4a0c2d
1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
12 from nmigen
.hdl
.ast
import Rose
13 from nmigen
.lib
.cdc
import FFSynchronizer
14 from nmigen
.utils
import log2_int
16 from lambdasoc
.periph
import Peripheral
18 from gram
.common
import *
19 from gram
.phy
.dfi
import Interface
20 from gram
.compat
import Timeline
22 __all__
= ["ECP5DDRPHY"]
25 class ECP5DDRPHYInit(Elaboratable
):
32 def elaborate(self
, platform
):
38 # DDRDLLA instance -------------------------------------------------------------------------
42 m
.submodules
+= Instance("DDRDLLA",
43 i_CLK
=ClockSignal("dramsync2x"),
44 i_RST
=ResetSignal("init"),
49 m
.submodules
+= FFSynchronizer(_lock
, lock
, o_domain
="init")
50 m
.d
.init
+= lock_d
.eq(lock
)
52 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
55 (1*t
, [ freeze
.eq(1)]), # Freeze DDRDLLA
56 (2*t
, [ self
.stop
.eq(1)]), # Stop ECLK domain
57 (3*t
, [self
.reset
.eq(1)]), # Reset ECLK domain
58 (4*t
, [self
.reset
.eq(0)]), # Release ECLK domain reset
59 (5*t
, [ self
.stop
.eq(0)]), # Release ECLK domain stop
60 (6*t
, [ freeze
.eq(0)]), # Release DDRDLLA freeze
61 (7*t
, [self
.pause
.eq(1)]), # Pause DQSBUFM
62 (8*t
, [ update
.eq(1)]), # Update DDRDLLA
63 (9*t
, [ update
.eq(0)]), # Release DDRDMMA update
64 (10*t
, [self
.pause
.eq(0)]), # Release DQSBUFM pause
66 m
.d
.comb
+= tl
.trigger
.eq(lock
& ~lock_d
) # Trigger timeline on lock rising edge
67 m
.submodules
+= DomainRenamer("init")(tl
)
72 class _DQSBUFMSettingManager(Elaboratable
):
73 """DQSBUFM setting manager.
75 The DQSBUFM primitive requires a very basic sequence when updating
76 read delay or other parameters. This elaboratable generates this
77 sequence from CSR events.
82 CSR storing the rdly value.
87 Pause signal for DQSBUFM.
88 readclksel : Signal(3), out
89 Readclksel signal for DQSBUFM.
91 def __init__(self
, rdly_csr
):
92 self
.rdly_csr
= rdly_csr
95 self
.readclksel
= Signal(3)
97 def elaborate(self
, platform
):
101 with m
.State("Idle"):
102 with m
.If(self
.rdly_csr
.w_stb
):
103 m
.d
.sync
+= self
.pause
.eq(1)
104 m
.next
= "RdlyUpdateRequestedDelay1"
106 with m
.State("RdlyUpdateRequestedDelay1"):
107 m
.next
= "RdlyUpdateRequestedDelay2"
109 with m
.State("RdlyUpdateRequestedDelay2"):
110 m
.next
= "RdlyUpdateRequestedDelay3"
112 with m
.State("RdlyUpdateRequestedDelay3"):
113 m
.next
= "RdlyUpdateRequested"
115 with m
.State("RdlyUpdateRequested"):
116 m
.d
.sync
+= self
.readclksel
.eq(self
.rdly_csr
.w_data
)
117 m
.next
= "ResetPauseDelay1"
119 with m
.State("ResetPauseDelay1"):
120 m
.next
= "ResetPauseDelay2"
122 with m
.State("ResetPauseDelay2"):
123 m
.next
= "ResetPauseDelay3"
125 with m
.State("ResetPauseDelay3"):
126 m
.next
= "ResetPause"
128 with m
.State("ResetPause"):
129 m
.d
.sync
+= self
.pause
.eq(0)
135 class ECP5DDRPHY(Peripheral
, Elaboratable
):
136 def __init__(self
, pads
, sys_clk_freq
=100e6
):
137 super().__init
__(name
="phy")
140 self
._sys
_clk
_freq
= sys_clk_freq
141 self
.init
= ECP5DDRPHYInit()
143 databits
= len(self
.pads
.dq
.io
)
144 if databits
% 8 != 0:
145 raise ValueError("DQ pads should come in a multiple of 8")
148 bank
= self
.csr_bank()
150 self
.burstdet
= bank
.csr(databits
//8, "rw")
153 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p0")]
154 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p1")]
155 self
.bitslip
= bank
.csr(3, "rw") # phase-delay on read
157 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
158 self
.bus
= self
._bridge
.bus
160 addressbits
= len(self
.pads
.a
.o0
)
161 bankbits
= len(self
.pads
.ba
.o0
)
163 if hasattr(self
.pads
, "cs_n") and hasattr(self
.pads
.cs_n
, "o0"):
164 nranks
= len(self
.pads
.cs_n
.o0
)
165 databits
= len(self
.pads
.dq
.io
)
166 self
.dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4,
169 # PHY settings -----------------------------------------------------------------------------
170 tck
= 1/(2*self
._sys
_clk
_freq
)
172 databits
= len(self
.pads
.dq
.io
)
173 cl
, cwl
= get_cl_cw("DDR3", tck
)
174 cl_sys_latency
= get_sys_latency(nphases
, cl
)
175 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
176 rdphase
= get_sys_phase(nphases
, cl_sys_latency
, cl
)
177 wrphase
= get_sys_phase(nphases
, cwl_sys_latency
, cwl
)
178 self
.settings
= PhySettings(
179 phytype
="ECP5DDRPHY",
182 dfi_databits
=4*databits
,
187 rdcmdphase
= (rdphase
- 1)%nphases
,
188 wrcmdphase
= (wrphase
- 1)%nphases
,
191 read_latency
= cl_sys_latency
+ 10,
192 write_latency
=cwl_sys_latency
195 def elaborate(self
, platform
):
198 m
.submodules
.bridge
= self
._bridge
200 tck
= 1/(2*self
._sys
_clk
_freq
)
202 databits
= len(self
.pads
.dq
.io
)
204 burstdet_reg
= Signal(databits
//8, reset_less
=True)
205 m
.d
.comb
+= self
.burstdet
.r_data
.eq(burstdet_reg
)
208 with m
.If(self
.burstdet
.w_stb
):
209 m
.d
.sync
+= burstdet_reg
.eq(0)
211 # Init -------------------------------------------------------------------------------------
212 m
.submodules
.init
= init
= self
.init
214 # Parameters -------------------------------------------------------------------------------
215 cl
, cwl
= get_cl_cw("DDR3", tck
)
216 cl_sys_latency
= get_sys_latency(nphases
, cl
)
217 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
219 # DFI Interface ----------------------------------------------------------------------------
224 # Clock --------------------------------------------------------------------------------
226 self
.pads
.clk
.o_clk
.eq(ClockSignal("dramsync")),
227 self
.pads
.clk
.o_prst
.eq(ResetSignal("dramsync")),
228 self
.pads
.clk
.o_fclk
.eq(ClockSignal("dramsync2x")),
230 for i
in range(len(self
.pads
.clk
.o0
)):
232 self
.pads
.clk
.o0
[i
].eq(0),
233 self
.pads
.clk
.o1
[i
].eq(1),
234 self
.pads
.clk
.o2
[i
].eq(0),
235 self
.pads
.clk
.o3
[i
].eq(1),
238 # Reset signal ------------------------
240 rst
= Signal(reset_less
=True)
241 drs
= ResetSignal("dramsync")
242 m
.d
.comb
+= rst
.eq(drs
)
243 #if hasattr(self.pads, "rst"):
246 # Addresses and Commands ---------------------------------------------------------------
248 self
.pads
.a
.o_clk
.eq(ClockSignal("dramsync")),
249 self
.pads
.a
.o_prst
.eq(ResetSignal("dramsync")),
250 self
.pads
.a
.o_fclk
.eq(ClockSignal("dramsync2x")),
251 self
.pads
.ba
.o_clk
.eq(ClockSignal("dramsync")),
252 self
.pads
.ba
.o_fclk
.eq(ClockSignal("dramsync2x")),
254 for i
in range(len(self
.pads
.a
.o0
)):
256 self
.pads
.a
.o0
[i
].eq(dfi
.phases
[0].address
[i
]),
257 self
.pads
.a
.o1
[i
].eq(dfi
.phases
[0].address
[i
]),
258 self
.pads
.a
.o2
[i
].eq(dfi
.phases
[1].address
[i
]),
259 self
.pads
.a
.o3
[i
].eq(dfi
.phases
[1].address
[i
]),
261 for i
in range(len(self
.pads
.ba
.o0
)):
263 self
.pads
.ba
.o0
[i
].eq(dfi
.phases
[0].bank
[i
]),
264 self
.pads
.ba
.o1
[i
].eq(dfi
.phases
[0].bank
[i
]),
265 self
.pads
.ba
.o2
[i
].eq(dfi
.phases
[1].bank
[i
]),
266 self
.pads
.ba
.o3
[i
].eq(dfi
.phases
[1].bank
[i
]),
269 # Control pins: all of thees have to be declared "xdr 4" when
270 # requesting the resource:
271 # ddr_pins = platform.request("ddr3", 0, xdr={"clk":4, "odt":4, ... })
272 controls
= ["ras", "cas", "we", "clk_en", "odt"]
273 if hasattr(self
.pads
, "rst"): # this gets renamed later to match dfi
274 controls
.append("rst")
275 if hasattr(self
.pads
, "cs"):
276 controls
.append("cs")
277 for name
in controls
:
278 print ("clock", name
, getattr(self
.pads
, name
))
279 pad
= getattr(self
.pads
, name
)
280 # sigh, convention in nmigen_boards is "rst" but in
281 # dfi.Interface it is "reset"
282 dfi2pads
= {'rst': 'reset', 'cs': 'cs_n'}
283 name
= dfi2pads
.get(name
, name
) # remap if exists
285 pad
.o_clk
.eq(ClockSignal("dramsync")),
286 pad
.o_prst
.eq(ResetSignal("dramsync")),
287 pad
.o_fclk
.eq(ClockSignal("dramsync2x")),
290 # cs_n can't be directly connected to cs without
291 # being inverted first...
292 for i
in range(len(pad
.o0
)):
294 pad
.o0
[i
].eq(~
getattr(dfi
.phases
[0], name
)[i
]),
295 pad
.o1
[i
].eq(~
getattr(dfi
.phases
[0], name
)[i
]),
296 pad
.o2
[i
].eq(~
getattr(dfi
.phases
[1], name
)[i
]),
297 pad
.o3
[i
].eq(~
getattr(dfi
.phases
[1], name
)[i
]),
300 for i
in range(len(pad
.o0
)):
302 pad
.o0
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
303 pad
.o1
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
304 pad
.o2
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
305 pad
.o3
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
308 # DQ ---------------------------------------------------------------------------------------
312 dqs_postamble
= Signal()
313 dqs_preamble
= Signal()
314 for i
in range(databits
//8):
324 datavalid_prev
= Signal()
325 m
.d
.sync
+= datavalid_prev
.eq(datavalid
)
327 dqsbufm_manager
= _DQSBUFMSettingManager(self
.rdly
[i
])
328 setattr(m
.submodules
, f
"dqsbufm_manager{i}", dqsbufm_manager
)
330 m
.submodules
+= Instance("DQSBUFM",
331 p_DQS_LI_DEL_ADJ
="MINUS",
333 p_DQS_LO_DEL_ADJ
="MINUS",
347 i_SCLK
=ClockSignal("sync"),
348 i_ECLK
=ClockSignal("dramsync2x"),
349 i_RST
=ResetSignal("dramsync"),
351 i_PAUSE
=init
.pause | dqsbufm_manager
.pause
,
354 # Assert LOADNs to use DDRDEL control
362 # Reads (generate shifted DQS clock for reads)
365 i_READCLKSEL0
=dqsbufm_manager
.readclksel
[0],
366 i_READCLKSEL1
=dqsbufm_manager
.readclksel
[1],
367 i_READCLKSEL2
=dqsbufm_manager
.readclksel
[2],
377 o_DATAVALID
=datavalid
,
379 # Writes (generate shifted ECLK clock for writes)
383 with m
.If(Rose(burstdet
)):
384 m
.d
.sync
+= burstdet_reg
[i
].eq(1)
386 # DQS and DM ---------------------------------------------------------------------------
387 dm_o_data
= Signal(8)
388 dm_o_data_d
= Signal(8, reset_less
=True)
389 dm_o_data_muxed
= Signal(4, reset_less
=True)
390 m
.d
.comb
+= dm_o_data
.eq(Cat(
391 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
392 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
393 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
394 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
396 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
397 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
398 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
399 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
401 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
403 with m
.If(bl8_chunk
):
404 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data_d
[4:])
406 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data
[:4])
408 m
.submodules
+= Instance("ODDRX2DQA",
409 i_RST
=ResetSignal("dramsync"),
410 i_ECLK
=ClockSignal("dramsync2x"),
411 i_SCLK
=ClockSignal("dramsync"),
413 i_D0
=dm_o_data_muxed
[0],
414 i_D1
=dm_o_data_muxed
[1],
415 i_D2
=dm_o_data_muxed
[2],
416 i_D3
=dm_o_data_muxed
[3],
417 o_Q
=self
.pads
.dm
.o
[i
])
422 Instance("ODDRX2DQSB",
423 i_RST
=ResetSignal("dramsync"),
424 i_ECLK
=ClockSignal("dramsync2x"),
425 i_SCLK
=ClockSignal(),
432 Instance("TSHX2DQSA",
433 i_RST
=ResetSignal("dramsync"),
434 i_ECLK
=ClockSignal("dramsync2x"),
435 i_SCLK
=ClockSignal(),
437 i_T0
=~
(dqs_oe | dqs_postamble
),
438 i_T1
=~
(dqs_oe | dqs_preamble
),
444 io_B
=self
.pads
.dqs
.p
[i
]),
447 for j
in range(8*i
, 8*(i
+1)):
451 dq_i_delayed
= Signal()
452 dq_i_data
= Signal(4)
453 dq_o_data
= Signal(8)
454 dq_o_data_d
= Signal(8, reset_less
=True)
455 dq_o_data_muxed
= Signal(4, reset_less
=True)
456 m
.d
.comb
+= dq_o_data
.eq(Cat(
457 dfi
.phases
[0].wrdata
[0*databits
+j
],
458 dfi
.phases
[0].wrdata
[1*databits
+j
],
459 dfi
.phases
[0].wrdata
[2*databits
+j
],
460 dfi
.phases
[0].wrdata
[3*databits
+j
],
461 dfi
.phases
[1].wrdata
[0*databits
+j
],
462 dfi
.phases
[1].wrdata
[1*databits
+j
],
463 dfi
.phases
[1].wrdata
[2*databits
+j
],
464 dfi
.phases
[1].wrdata
[3*databits
+j
])
467 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
468 with m
.If(bl8_chunk
):
469 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data_d
[4:])
471 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data
[:4])
474 Instance("ODDRX2DQA",
475 i_RST
=ResetSignal("dramsync"),
476 i_ECLK
=ClockSignal("dramsync2x"),
477 i_SCLK
=ClockSignal(),
479 i_D0
=dq_o_data_muxed
[0],
480 i_D1
=dq_o_data_muxed
[1],
481 i_D2
=dq_o_data_muxed
[2],
482 i_D3
=dq_o_data_muxed
[3],
485 p_DEL_MODE
= "DQS_ALIGNED_X2",
488 Instance("IDDRX2DQA",
489 i_RST
=ResetSignal("dramsync"),
490 i_ECLK
=ClockSignal("dramsync2x"),
491 i_SCLK
=ClockSignal(),
505 i_RST
=ResetSignal("dramsync"),
506 i_ECLK
=ClockSignal("dramsync2x"),
507 i_SCLK
=ClockSignal(),
516 io_B
=self
.pads
.dq
.io
[j
])
518 with m
.If(~datavalid_prev
& datavalid
):
520 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
521 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
522 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
523 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
525 with m
.Elif(datavalid
):
527 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
528 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
529 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
530 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
533 # Read Control Path ------------------------------------------------------------------------
534 # Creates a shift register of read commands coming from the DFI interface. This shift register
535 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
536 # DFI interface that the read data is valid.
538 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
539 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
541 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
542 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
543 rddata_en
= Signal(self
.settings
.read_latency
)
544 rddata_en_last
= Signal
.like(rddata_en
)
545 m
.d
.comb
+= rddata_en
.eq(Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
546 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
547 m
.d
.comb
+= dqs_re
.eq(rddata_en
[cl_sys_latency
+ 1] | rddata_en
[cl_sys_latency
+ 2])
549 rddata_valid
= Signal()
550 m
.d
.sync
+= rddata_valid
.eq(datavalid_prev
& ~datavalid
)
551 for phase
in dfi
.phases
:
552 m
.d
.comb
+= phase
.rddata_valid
.eq(rddata_valid
)
554 # Write Control Path -----------------------------------------------------------------------
555 # Creates a shift register of write commands coming from the DFI interface. This shift register
556 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
557 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
558 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
559 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
560 # FIXME: understand +2
561 wrdata_en
= Signal(cwl_sys_latency
+ 4)
562 wrdata_en_last
= Signal
.like(wrdata_en
)
563 m
.d
.comb
+= wrdata_en
.eq(Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
564 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
565 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 1] | wrdata_en
[cwl_sys_latency
+ 2])
566 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
567 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
569 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
570 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
571 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
572 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
573 m
.d
.comb
+= dqs_preamble
.eq(wrdata_en
[cwl_sys_latency
+ 0] & ~wrdata_en
[cwl_sys_latency
+ 1])
574 m
.d
.comb
+= dqs_postamble
.eq(wrdata_en
[cwl_sys_latency
+ 3] & ~wrdata_en
[cwl_sys_latency
+ 2])