007c143cba8c2c7ba3209246e549ad08d44ade77
1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
12 from nmigen
.hdl
.ast
import Rose
13 from nmigen
.lib
.cdc
import FFSynchronizer
14 from nmigen
.utils
import log2_int
16 from lambdasoc
.periph
import Peripheral
18 from gram
.common
import *
19 from gram
.phy
.dfi
import Interface
20 from gram
.compat
import Timeline
22 __all__
= ["ECP5DDRPHY"]
25 class ECP5DDRPHYInit(Elaboratable
):
32 def elaborate(self
, platform
):
38 # DDRDLLA instance -------------------------------------------------------------------------
42 m
.submodules
+= Instance("DDRDLLA",
43 i_CLK
=ClockSignal("sync2x"),
44 i_RST
=ResetSignal("init"),
49 m
.submodules
+= FFSynchronizer(_lock
, lock
, o_domain
="init")
50 m
.d
.init
+= lock_d
.eq(lock
)
52 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
55 (1*t
, [ freeze
.eq(1)]), # Freeze DDRDLLA
56 (2*t
, [ self
.stop
.eq(1)]), # Stop ECLK domain
57 (3*t
, [self
.reset
.eq(1)]), # Reset ECLK domain
58 (4*t
, [self
.reset
.eq(0)]), # Release ECLK domain reset
59 (5*t
, [ self
.stop
.eq(0)]), # Release ECLK domain stop
60 (6*t
, [ freeze
.eq(0)]), # Release DDRDLLA freeze
61 (7*t
, [self
.pause
.eq(1)]), # Pause DQSBUFM
62 (8*t
, [ update
.eq(1)]), # Update DDRDLLA
63 (9*t
, [ update
.eq(0)]), # Release DDRDMMA update
64 (10*t
, [self
.pause
.eq(0)]), # Release DQSBUFM pause
66 m
.d
.comb
+= tl
.trigger
.eq(lock
& ~lock_d
) # Trigger timeline on lock rising edge
67 m
.submodules
+= DomainRenamer("init")(tl
)
72 class _DQSBUFMSettingManager(Elaboratable
):
73 """DQSBUFM setting manager.
75 The DQSBUFM primitive requires a very basic sequence when updating
76 read delay or other parameters. This elaboratable generates this
77 sequence from CSR events.
82 CSR storing the rdly value.
87 Pause signal for DQSBUFM.
88 readclksel : Signal(3), out
89 Readclksel signal for DQSBUFM.
91 def __init__(self
, rdly_csr
):
92 self
.rdly_csr
= rdly_csr
95 self
.readclksel
= Signal(3)
97 def elaborate(self
, platform
):
101 with m
.State("Idle"):
102 with m
.If(self
.rdly_csr
.w_stb
):
103 m
.d
.sync
+= self
.pause
.eq(1)
104 m
.next
= "RdlyUpdateRequestedDelay1"
106 with m
.State("RdlyUpdateRequestedDelay1"):
107 m
.next
= "RdlyUpdateRequestedDelay2"
109 with m
.State("RdlyUpdateRequestedDelay2"):
110 m
.next
= "RdlyUpdateRequestedDelay3"
112 with m
.State("RdlyUpdateRequestedDelay3"):
113 m
.next
= "RdlyUpdateRequested"
115 with m
.State("RdlyUpdateRequested"):
116 m
.d
.sync
+= self
.readclksel
.eq(self
.rdly_csr
.w_data
)
117 m
.next
= "ResetPauseDelay1"
119 with m
.State("ResetPauseDelay1"):
120 m
.next
= "ResetPauseDelay2"
122 with m
.State("ResetPauseDelay2"):
123 m
.next
= "ResetPauseDelay3"
125 with m
.State("ResetPauseDelay3"):
126 m
.next
= "ResetPause"
128 with m
.State("ResetPause"):
129 m
.d
.sync
+= self
.pause
.eq(0)
135 class ECP5DDRPHY(Peripheral
, Elaboratable
):
136 def __init__(self
, pads
, sys_clk_freq
=100e6
):
137 super().__init
__(name
="phy")
140 self
._sys
_clk
_freq
= sys_clk_freq
141 self
.init
= ECP5DDRPHYInit()
143 databits
= len(self
.pads
.dq
.io
)
144 if databits
% 8 != 0:
145 raise ValueError("DQ pads should come in a multiple of 8")
148 bank
= self
.csr_bank()
150 self
.burstdet
= bank
.csr(databits
//8, "rw")
153 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p0")]
154 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p1")]
155 self
.bitslip
= bank
.csr(3, "rw") # phase-delay on read
157 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
158 self
.bus
= self
._bridge
.bus
160 addressbits
= len(self
.pads
.a
.o0
)
161 bankbits
= len(self
.pads
.ba
.o0
)
163 if hasattr(self
.pads
, "cs_n") and hasattr(self
.pads
.cs_n
, "o0"):
164 nranks
= len(self
.pads
.cs_n
.o0
)
165 databits
= len(self
.pads
.dq
.io
)
166 self
.dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4,
169 # PHY settings -----------------------------------------------------------------------------
170 tck
= 1/(2*self
._sys
_clk
_freq
)
172 databits
= len(self
.pads
.dq
.io
)
173 cl
, cwl
= get_cl_cw("DDR3", tck
)
174 cl_sys_latency
= get_sys_latency(nphases
, cl
)
175 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
176 rdphase
= get_sys_phase(nphases
, cl_sys_latency
, cl
)
177 wrphase
= get_sys_phase(nphases
, cwl_sys_latency
, cwl
)
178 self
.settings
= PhySettings(
179 phytype
="ECP5DDRPHY",
182 dfi_databits
=4*databits
,
187 rdcmdphase
= (rdphase
- 1)%nphases
,
188 wrcmdphase
= (wrphase
- 1)%nphases
,
191 read_latency
= cl_sys_latency
+ 10,
192 write_latency
=cwl_sys_latency
195 def elaborate(self
, platform
):
198 m
.submodules
.bridge
= self
._bridge
200 tck
= 1/(2*self
._sys
_clk
_freq
)
202 databits
= len(self
.pads
.dq
.io
)
204 burstdet_reg
= Signal(databits
//8, reset_less
=True)
205 m
.d
.comb
+= self
.burstdet
.r_data
.eq(burstdet_reg
)
208 with m
.If(self
.burstdet
.w_stb
):
209 m
.d
.sync
+= burstdet_reg
.eq(0)
211 # Init -------------------------------------------------------------------------------------
212 m
.submodules
.init
= init
= self
.init
214 # Parameters -------------------------------------------------------------------------------
215 cl
, cwl
= get_cl_cw("DDR3", tck
)
216 cl_sys_latency
= get_sys_latency(nphases
, cl
)
217 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
219 # DFI Interface ----------------------------------------------------------------------------
224 # Clock --------------------------------------------------------------------------------
226 self
.pads
.clk
.o_clk
.eq(ClockSignal("dramsync")),
227 self
.pads
.clk
.o_fclk
.eq(ClockSignal("sync2x")),
229 for i
in range(len(self
.pads
.clk
.o0
)):
231 self
.pads
.clk
.o0
[i
].eq(0),
232 self
.pads
.clk
.o1
[i
].eq(1),
233 self
.pads
.clk
.o2
[i
].eq(0),
234 self
.pads
.clk
.o3
[i
].eq(1),
237 # Addresses and Commands ---------------------------------------------------------------
239 self
.pads
.a
.o_clk
.eq(ClockSignal("dramsync")),
240 self
.pads
.a
.o_fclk
.eq(ClockSignal("sync2x")),
241 self
.pads
.ba
.o_clk
.eq(ClockSignal("dramsync")),
242 self
.pads
.ba
.o_fclk
.eq(ClockSignal("sync2x")),
244 for i
in range(len(self
.pads
.a
.o0
)):
246 self
.pads
.a
.o0
[i
].eq(dfi
.phases
[0].address
[i
]),
247 self
.pads
.a
.o1
[i
].eq(dfi
.phases
[0].address
[i
]),
248 self
.pads
.a
.o2
[i
].eq(dfi
.phases
[1].address
[i
]),
249 self
.pads
.a
.o3
[i
].eq(dfi
.phases
[1].address
[i
]),
251 for i
in range(len(self
.pads
.ba
.o0
)):
253 self
.pads
.ba
.o0
[i
].eq(dfi
.phases
[0].bank
[i
]),
254 self
.pads
.ba
.o1
[i
].eq(dfi
.phases
[0].bank
[i
]),
255 self
.pads
.ba
.o2
[i
].eq(dfi
.phases
[1].bank
[i
]),
256 self
.pads
.ba
.o3
[i
].eq(dfi
.phases
[1].bank
[i
]),
259 # Control pins: all of thees have to be declared "xdr 4" when
260 # requesting the resource:
261 # ddr_pins = platform.request("ddr3", 0, xdr={"clk":4, "odt":4, ... })
262 controls
= ["ras", "cas", "we", "clk_en", "odt"]
263 if hasattr(self
.pads
, "rst"): # this gets renamed later to match dfi
264 controls
.append("rst")
265 if hasattr(self
.pads
, "cs"):
266 controls
.append("cs")
267 for name
in controls
:
268 print ("clock", name
, getattr(self
.pads
, name
))
269 pad
= getattr(self
.pads
, name
)
270 # sigh, convention in nmigen_boards is "rst" but in
271 # dfi.Interface it is "reset"
272 dfi2pads
= {'rst': 'reset', 'cs': 'cs_n'}
273 name
= dfi2pads
.get(name
, name
) # remap if exists
276 pad
.o_clk
.eq(ClockSignal("sync")),
280 pad
.o_clk
.eq(ClockSignal("dramsync")),
281 pad
.o_fclk
.eq(ClockSignal("sync2x")),
284 for i
in range(len(pad
.o
)):
286 pad
.o
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
289 # cs_n can't be directly connected to cs without being inverted first...
290 for i
in range(len(pad
.o0
)):
292 pad
.o0
[i
].eq(~
getattr(dfi
.phases
[0], name
)[i
]),
293 pad
.o1
[i
].eq(~
getattr(dfi
.phases
[0], name
)[i
]),
294 pad
.o2
[i
].eq(~
getattr(dfi
.phases
[1], name
)[i
]),
295 pad
.o3
[i
].eq(~
getattr(dfi
.phases
[1], name
)[i
]),
298 for i
in range(len(pad
.o0
)):
300 pad
.o0
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
301 pad
.o1
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
302 pad
.o2
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
303 pad
.o3
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
306 # DQ ---------------------------------------------------------------------------------------
310 dqs_postamble
= Signal()
311 dqs_preamble
= Signal()
312 for i
in range(databits
//8):
322 datavalid_prev
= Signal()
323 m
.d
.sync
+= datavalid_prev
.eq(datavalid
)
325 dqsbufm_manager
= _DQSBUFMSettingManager(self
.rdly
[i
])
326 setattr(m
.submodules
, f
"dqsbufm_manager{i}", dqsbufm_manager
)
328 m
.submodules
+= Instance("DQSBUFM",
329 p_DQS_LI_DEL_ADJ
="MINUS",
331 p_DQS_LO_DEL_ADJ
="MINUS",
345 i_SCLK
=ClockSignal("sync"),
346 i_ECLK
=ClockSignal("sync2x"),
347 i_RST
=ResetSignal("dramsync"),
349 i_PAUSE
=init
.pause | dqsbufm_manager
.pause
,
352 # Assert LOADNs to use DDRDEL control
360 # Reads (generate shifted DQS clock for reads)
363 i_READCLKSEL0
=dqsbufm_manager
.readclksel
[0],
364 i_READCLKSEL1
=dqsbufm_manager
.readclksel
[1],
365 i_READCLKSEL2
=dqsbufm_manager
.readclksel
[2],
375 o_DATAVALID
=datavalid
,
377 # Writes (generate shifted ECLK clock for writes)
381 with m
.If(Rose(burstdet
)):
382 m
.d
.sync
+= burstdet_reg
[i
].eq(1)
384 # DQS and DM ---------------------------------------------------------------------------
385 dm_o_data
= Signal(8)
386 dm_o_data_d
= Signal(8, reset_less
=True)
387 dm_o_data_muxed
= Signal(4, reset_less
=True)
388 m
.d
.comb
+= dm_o_data
.eq(Cat(
389 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
390 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
391 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
392 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
394 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
395 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
396 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
397 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
399 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
401 with m
.If(bl8_chunk
):
402 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data_d
[4:])
404 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data
[:4])
406 m
.submodules
+= Instance("ODDRX2DQA",
407 i_RST
=ResetSignal("dramsync"),
408 i_ECLK
=ClockSignal("sync2x"),
409 i_SCLK
=ClockSignal("dramsync"),
411 i_D0
=dm_o_data_muxed
[0],
412 i_D1
=dm_o_data_muxed
[1],
413 i_D2
=dm_o_data_muxed
[2],
414 i_D3
=dm_o_data_muxed
[3],
415 o_Q
=self
.pads
.dm
.o
[i
])
420 Instance("ODDRX2DQSB",
421 i_RST
=ResetSignal("dramsync"),
422 i_ECLK
=ClockSignal("sync2x"),
423 i_SCLK
=ClockSignal(),
430 Instance("TSHX2DQSA",
431 i_RST
=ResetSignal("dramsync"),
432 i_ECLK
=ClockSignal("sync2x"),
433 i_SCLK
=ClockSignal(),
435 i_T0
=~
(dqs_oe | dqs_postamble
),
436 i_T1
=~
(dqs_oe | dqs_preamble
),
442 io_B
=self
.pads
.dqs
.p
[i
]),
445 for j
in range(8*i
, 8*(i
+1)):
449 dq_i_delayed
= Signal()
450 dq_i_data
= Signal(4)
451 dq_o_data
= Signal(8)
452 dq_o_data_d
= Signal(8, reset_less
=True)
453 dq_o_data_muxed
= Signal(4, reset_less
=True)
454 m
.d
.comb
+= dq_o_data
.eq(Cat(
455 dfi
.phases
[0].wrdata
[0*databits
+j
],
456 dfi
.phases
[0].wrdata
[1*databits
+j
],
457 dfi
.phases
[0].wrdata
[2*databits
+j
],
458 dfi
.phases
[0].wrdata
[3*databits
+j
],
459 dfi
.phases
[1].wrdata
[0*databits
+j
],
460 dfi
.phases
[1].wrdata
[1*databits
+j
],
461 dfi
.phases
[1].wrdata
[2*databits
+j
],
462 dfi
.phases
[1].wrdata
[3*databits
+j
])
465 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
466 with m
.If(bl8_chunk
):
467 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data_d
[4:])
469 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data
[:4])
472 Instance("ODDRX2DQA",
473 i_RST
=ResetSignal("dramsync"),
474 i_ECLK
=ClockSignal("sync2x"),
475 i_SCLK
=ClockSignal(),
477 i_D0
=dq_o_data_muxed
[0],
478 i_D1
=dq_o_data_muxed
[1],
479 i_D2
=dq_o_data_muxed
[2],
480 i_D3
=dq_o_data_muxed
[3],
483 p_DEL_MODE
= "DQS_ALIGNED_X2",
486 Instance("IDDRX2DQA",
487 i_RST
=ResetSignal("dramsync"),
488 i_ECLK
=ClockSignal("sync2x"),
489 i_SCLK
=ClockSignal(),
503 i_RST
=ResetSignal("dramsync"),
504 i_ECLK
=ClockSignal("sync2x"),
505 i_SCLK
=ClockSignal(),
514 io_B
=self
.pads
.dq
.io
[j
])
516 with m
.If(~datavalid_prev
& datavalid
):
518 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
519 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
520 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
521 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
523 with m
.Elif(datavalid
):
525 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
526 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
527 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
528 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
531 # Read Control Path ------------------------------------------------------------------------
532 # Creates a shift register of read commands coming from the DFI interface. This shift register
533 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
534 # DFI interface that the read data is valid.
536 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
537 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
539 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
540 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
541 rddata_en
= Signal(self
.settings
.read_latency
)
542 rddata_en_last
= Signal
.like(rddata_en
)
543 m
.d
.comb
+= rddata_en
.eq(Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
544 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
545 m
.d
.comb
+= dqs_re
.eq(rddata_en
[cl_sys_latency
+ 1] | rddata_en
[cl_sys_latency
+ 2])
547 rddata_valid
= Signal()
548 m
.d
.sync
+= rddata_valid
.eq(datavalid_prev
& ~datavalid
)
549 for phase
in dfi
.phases
:
550 m
.d
.comb
+= phase
.rddata_valid
.eq(rddata_valid
)
552 # Write Control Path -----------------------------------------------------------------------
553 # Creates a shift register of write commands coming from the DFI interface. This shift register
554 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
555 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
556 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
557 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
558 # FIXME: understand +2
559 wrdata_en
= Signal(cwl_sys_latency
+ 4)
560 wrdata_en_last
= Signal
.like(wrdata_en
)
561 m
.d
.comb
+= wrdata_en
.eq(Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
562 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
563 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 1] | wrdata_en
[cwl_sys_latency
+ 2])
564 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
565 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
567 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
568 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
569 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
570 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
571 m
.d
.comb
+= dqs_preamble
.eq(wrdata_en
[cwl_sys_latency
+ 0] & ~wrdata_en
[cwl_sys_latency
+ 1])
572 m
.d
.comb
+= dqs_postamble
.eq(wrdata_en
[cwl_sys_latency
+ 3] & ~wrdata_en
[cwl_sys_latency
+ 2])