1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
12 from nmigen
.hdl
.ast
import Rose
13 from nmigen
.lib
.cdc
import FFSynchronizer
14 from nmigen
.utils
import log2_int
16 from lambdasoc
.periph
import Peripheral
18 from gram
.common
import *
19 from gram
.phy
.dfi
import Interface
20 from gram
.compat
import Timeline
22 __all__
= ["ECP5DDRPHY"]
25 class ECP5DDRPHYInit(Elaboratable
):
32 def elaborate(self
, platform
):
38 # DDRDLLA instance -------------------------------------------------------------------------
42 m
.submodules
+= Instance("DDRDLLA",
43 i_CLK
=ClockSignal("sync2x"),
44 i_RST
=ResetSignal("init"),
49 m
.submodules
+= FFSynchronizer(_lock
, lock
, o_domain
="init")
50 m
.d
.init
+= lock_d
.eq(lock
)
52 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
55 (1*t
, [freeze
.eq(1)]), # Freeze DDRDLLA
56 (2*t
, [self
.stop
.eq(1)]), # Stop ECLK domain
57 (3*t
, [self
.reset
.eq(1)]), # Reset ECLK domain
58 (4*t
, [self
.reset
.eq(0)]), # Release ECLK domain reset
59 (5*t
, [self
.stop
.eq(0)]), # Release ECLK domain stop
60 (6*t
, [freeze
.eq(0)]), # Release DDRDLLA freeze
61 (7*t
, [self
.pause
.eq(1)]), # Pause DQSBUFM
62 (8*t
, [update
.eq(1)]), # Update DDRDLLA
63 (9*t
, [update
.eq(0)]), # Release DDRDMMA update
64 (10*t
, [self
.pause
.eq(0)]), # Release DQSBUFM pause
66 m
.d
.comb
+= tl
.trigger
.eq(lock
& ~lock_d
) # Trigger timeline on lock rising edge
67 m
.submodules
+= DomainRenamer("init")(tl
)
72 class _DQSBUFMSettingManager(Elaboratable
):
73 """DQSBUFM setting manager.
75 The DQSBUFM primitive requires a very basic sequence when updating
76 read delay or other parameters. This elaboratable generates this
77 sequence from CSR events.
82 CSR storing the rdly value.
87 Pause signal for DQSBUFM.
88 readclksel : Signal(3), out
89 Readclksel signal for DQSBUFM.
91 def __init__(self
, rdly_csr
):
92 self
.rdly_csr
= rdly_csr
95 self
.readclksel
= Signal(3)
97 def elaborate(self
, platform
):
101 with m
.State("Idle"):
102 with m
.If(self
.rdly_csr
.w_stb
):
103 m
.d
.sync
+= self
.pause
.eq(1)
104 m
.next
= "RdlyUpdateRequested"
106 with m
.State("RdlyUpdateRequested"):
107 m
.d
.sync
+= self
.readclksel
.eq(self
.rdly_csr
.w_data
)
108 m
.next
= "ResetPause"
110 with m
.State("ResetPause"):
111 m
.d
.sync
+= self
.pause
.eq(0)
117 class ECP5DDRPHY(Peripheral
, Elaboratable
):
118 def __init__(self
, pads
, sys_clk_freq
=100e6
):
119 super().__init
__(name
="phy")
122 self
._sys
_clk
_freq
= sys_clk_freq
124 databits
= len(self
.pads
.dq
.io
)
125 if databits
% 8 != 0:
126 raise ValueError("DQ pads should come in a multiple of 8")
129 bank
= self
.csr_bank()
131 self
.burstdet
= bank
.csr(databits
//8, "rw")
134 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p0")]
135 self
.rdly
+= [bank
.csr(3, "rw", name
="rdly_p1")]
136 self
.bitslip
= bank
.csr(3, "rw") # phase-delay on read
138 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
139 self
.bus
= self
._bridge
.bus
141 addressbits
= len(self
.pads
.a
.o0
)
142 bankbits
= len(self
.pads
.ba
.o0
)
144 if hasattr(self
.pads
, "cs_n") and hasattr(self
.pads
.cs_n
, "o0"):
145 nranks
= len(self
.pads
.cs_n
.o0
)
146 databits
= len(self
.pads
.dq
.io
)
147 self
.dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4,
150 # PHY settings -----------------------------------------------------------------------------
151 tck
= 1/(2*self
._sys
_clk
_freq
)
153 databits
= len(self
.pads
.dq
.io
)
154 cl
, cwl
= get_cl_cw("DDR3", tck
)
155 cl_sys_latency
= get_sys_latency(nphases
, cl
)
156 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
157 rdphase
= get_sys_phase(nphases
, cl_sys_latency
, cl
)
158 wrphase
= get_sys_phase(nphases
, cwl_sys_latency
, cwl
)
159 self
.settings
= PhySettings(
160 phytype
="ECP5DDRPHY",
163 dfi_databits
=4*databits
,
168 rdcmdphase
= (rdphase
- 1)%nphases
,
169 wrcmdphase
= (wrphase
- 1)%nphases
,
172 read_latency
= cl_sys_latency
+ 10,
173 write_latency
=cwl_sys_latency
176 def elaborate(self
, platform
):
179 m
.submodules
.bridge
= self
._bridge
181 tck
= 1/(2*self
._sys
_clk
_freq
)
183 databits
= len(self
.pads
.dq
.io
)
185 burstdet_reg
= Signal(databits
//8, reset_less
=True)
186 m
.d
.comb
+= self
.burstdet
.r_data
.eq(burstdet_reg
)
189 with m
.If(self
.burstdet
.w_stb
):
190 m
.d
.sync
+= burstdet_reg
.eq(0)
192 # Init -------------------------------------------------------------------------------------
193 m
.submodules
.init
= init
= ECP5DDRPHYInit()
195 # Parameters -------------------------------------------------------------------------------
196 cl
, cwl
= get_cl_cw("DDR3", tck
)
197 cl_sys_latency
= get_sys_latency(nphases
, cl
)
198 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
200 # DFI Interface ----------------------------------------------------------------------------
205 # Clock --------------------------------------------------------------------------------
207 self
.pads
.clk
.o_clk
.eq(ClockSignal("dramsync")),
208 self
.pads
.clk
.o_fclk
.eq(ClockSignal("sync2x")),
210 for i
in range(len(self
.pads
.clk
.o0
)):
212 self
.pads
.clk
.o0
[i
].eq(0),
213 self
.pads
.clk
.o1
[i
].eq(1),
214 self
.pads
.clk
.o2
[i
].eq(0),
215 self
.pads
.clk
.o3
[i
].eq(1),
218 # Addresses and Commands ---------------------------------------------------------------
220 self
.pads
.a
.o_clk
.eq(ClockSignal("dramsync")),
221 self
.pads
.a
.o_fclk
.eq(ClockSignal("sync2x")),
222 self
.pads
.ba
.o_clk
.eq(ClockSignal("dramsync")),
223 self
.pads
.ba
.o_fclk
.eq(ClockSignal("sync2x")),
225 for i
in range(len(self
.pads
.a
.o0
)):
227 self
.pads
.a
.o0
[i
].eq(dfi
.phases
[0].address
[i
]),
228 self
.pads
.a
.o1
[i
].eq(dfi
.phases
[0].address
[i
]),
229 self
.pads
.a
.o2
[i
].eq(dfi
.phases
[1].address
[i
]),
230 self
.pads
.a
.o3
[i
].eq(dfi
.phases
[1].address
[i
]),
232 for i
in range(len(self
.pads
.ba
.o0
)):
234 self
.pads
.ba
.o0
[i
].eq(dfi
.phases
[0].bank
[i
]),
235 self
.pads
.ba
.o1
[i
].eq(dfi
.phases
[0].bank
[i
]),
236 self
.pads
.ba
.o2
[i
].eq(dfi
.phases
[1].bank
[i
]),
237 self
.pads
.ba
.o3
[i
].eq(dfi
.phases
[1].bank
[i
]),
240 # Control pins: all of thees have to be declared "xdr 4" when
241 # requesting the resource:
242 # ddr_pins = platform.request("ddr3", 0, xdr={"clk":4, "odt":4, ... })
243 controls
= ["ras", "cas", "we", "clk_en", "odt"]
244 if hasattr(self
.pads
, "rst"): # this gets renamed later to match dfi
245 controls
.append("rst")
246 if hasattr(self
.pads
, "cs"):
247 controls
.append("cs")
248 for name
in controls
:
249 print ("clock", name
, getattr(self
.pads
, name
))
250 pad
= getattr(self
.pads
, name
)
251 # sigh, convention in nmigen_boards is "rst" but in
252 # dfi.Interface it is "reset"
253 dfi2pads
= {'rst': 'reset', 'cs': 'cs_n'}
254 name
= dfi2pads
.get(name
, name
) # remap if exists
257 pad
.o_clk
.eq(ClockSignal("sync")),
261 pad
.o_clk
.eq(ClockSignal("dramsync")),
262 pad
.o_fclk
.eq(ClockSignal("sync2x")),
265 for i
in range(len(pad
.o
)):
267 pad
.o
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
270 # cs_n can't be directly connected to cs without being inverted first...
271 for i
in range(len(pad
.o0
)):
273 pad
.o0
[i
].eq(~
getattr(dfi
.phases
[0], name
)[i
]),
274 pad
.o1
[i
].eq(~
getattr(dfi
.phases
[0], name
)[i
]),
275 pad
.o2
[i
].eq(~
getattr(dfi
.phases
[1], name
)[i
]),
276 pad
.o3
[i
].eq(~
getattr(dfi
.phases
[1], name
)[i
]),
279 for i
in range(len(pad
.o0
)):
281 pad
.o0
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
282 pad
.o1
[i
].eq(getattr(dfi
.phases
[0], name
)[i
]),
283 pad
.o2
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
284 pad
.o3
[i
].eq(getattr(dfi
.phases
[1], name
)[i
]),
287 # DQ ---------------------------------------------------------------------------------------
291 dqs_postamble
= Signal()
292 dqs_preamble
= Signal()
293 for i
in range(databits
//8):
303 datavalid_prev
= Signal()
304 m
.d
.sync
+= datavalid_prev
.eq(datavalid
)
306 dqsbufm_manager
= _DQSBUFMSettingManager(self
.rdly
[i
])
307 setattr(m
.submodules
, f
"dqsbufm_manager{i}", dqsbufm_manager
)
309 m
.submodules
+= Instance("DQSBUFM",
310 p_DQS_LI_DEL_ADJ
="MINUS",
312 p_DQS_LO_DEL_ADJ
="MINUS",
326 i_SCLK
=ClockSignal("sync"),
327 i_ECLK
=ClockSignal("sync2x"),
328 i_RST
=ResetSignal("dramsync"),
330 i_PAUSE
=init
.pause | dqsbufm_manager
.pause
,
333 # Assert LOADNs to use DDRDEL control
341 # Reads (generate shifted DQS clock for reads)
344 i_READCLKSEL0
=dqsbufm_manager
.readclksel
[0],
345 i_READCLKSEL1
=dqsbufm_manager
.readclksel
[1],
346 i_READCLKSEL2
=dqsbufm_manager
.readclksel
[2],
356 o_DATAVALID
=datavalid
,
358 # Writes (generate shifted ECLK clock for writes)
362 with m
.If(Rose(burstdet
)):
363 m
.d
.sync
+= burstdet_reg
[i
].eq(1)
365 # DQS and DM ---------------------------------------------------------------------------
366 dm_o_data
= Signal(8)
367 dm_o_data_d
= Signal(8, reset_less
=True)
368 dm_o_data_muxed
= Signal(4, reset_less
=True)
369 m
.d
.comb
+= dm_o_data
.eq(Cat(
370 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
371 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
372 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
373 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
375 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
376 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
377 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
378 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
380 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
382 with m
.If(bl8_chunk
):
383 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data_d
[4:])
385 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data
[:4])
387 m
.submodules
+= Instance("ODDRX2DQA",
388 i_RST
=ResetSignal("dramsync"),
389 i_ECLK
=ClockSignal("sync2x"),
390 i_SCLK
=ClockSignal("dramsync"),
392 i_D0
=dm_o_data_muxed
[0],
393 i_D1
=dm_o_data_muxed
[1],
394 i_D2
=dm_o_data_muxed
[2],
395 i_D3
=dm_o_data_muxed
[3],
396 o_Q
=self
.pads
.dm
.o
[i
])
401 Instance("ODDRX2DQSB",
402 i_RST
=ResetSignal("dramsync"),
403 i_ECLK
=ClockSignal("sync2x"),
404 i_SCLK
=ClockSignal(),
411 Instance("TSHX2DQSA",
412 i_RST
=ResetSignal("dramsync"),
413 i_ECLK
=ClockSignal("sync2x"),
414 i_SCLK
=ClockSignal(),
416 i_T0
=~
(dqs_oe | dqs_postamble
),
417 i_T1
=~
(dqs_oe | dqs_preamble
),
423 io_B
=self
.pads
.dqs
.p
[i
]),
426 for j
in range(8*i
, 8*(i
+1)):
430 dq_i_delayed
= Signal()
431 dq_i_data
= Signal(4)
432 dq_o_data
= Signal(8)
433 dq_o_data_d
= Signal(8, reset_less
=True)
434 dq_o_data_muxed
= Signal(4, reset_less
=True)
435 m
.d
.comb
+= dq_o_data
.eq(Cat(
436 dfi
.phases
[0].wrdata
[0*databits
+j
],
437 dfi
.phases
[0].wrdata
[1*databits
+j
],
438 dfi
.phases
[0].wrdata
[2*databits
+j
],
439 dfi
.phases
[0].wrdata
[3*databits
+j
],
440 dfi
.phases
[1].wrdata
[0*databits
+j
],
441 dfi
.phases
[1].wrdata
[1*databits
+j
],
442 dfi
.phases
[1].wrdata
[2*databits
+j
],
443 dfi
.phases
[1].wrdata
[3*databits
+j
])
446 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
447 with m
.If(bl8_chunk
):
448 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data_d
[4:])
450 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data
[:4])
453 Instance("ODDRX2DQA",
454 i_RST
=ResetSignal("dramsync"),
455 i_ECLK
=ClockSignal("sync2x"),
456 i_SCLK
=ClockSignal(),
458 i_D0
=dq_o_data_muxed
[0],
459 i_D1
=dq_o_data_muxed
[1],
460 i_D2
=dq_o_data_muxed
[2],
461 i_D3
=dq_o_data_muxed
[3],
464 p_DEL_MODE
="DQS_ALIGNED_X2",
470 Instance("IDDRX2DQA",
471 i_RST
=ResetSignal("dramsync"),
472 i_ECLK
=ClockSignal("sync2x"),
473 i_SCLK
=ClockSignal(),
487 i_RST
=ResetSignal("dramsync"),
488 i_ECLK
=ClockSignal("sync2x"),
489 i_SCLK
=ClockSignal(),
498 io_B
=self
.pads
.dq
.io
[j
])
500 with m
.If(~datavalid_prev
& datavalid
):
502 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
503 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
504 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
505 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
507 with m
.Elif(datavalid
):
509 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
510 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
511 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
512 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
515 # Read Control Path ------------------------------------------------------------------------
516 # Creates a shift register of read commands coming from the DFI interface. This shift register
517 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
518 # DFI interface that the read data is valid.
520 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
521 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
523 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
524 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
525 rddata_en
= Signal(self
.settings
.read_latency
)
526 rddata_en_last
= Signal
.like(rddata_en
)
527 m
.d
.comb
+= rddata_en
.eq(Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
528 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
529 m
.d
.comb
+= dqs_re
.eq(rddata_en
[cl_sys_latency
+ 1] | rddata_en
[cl_sys_latency
+ 2])
531 rddata_valid
= Signal()
532 m
.d
.sync
+= rddata_valid
.eq(datavalid_prev
& ~datavalid
)
533 for phase
in dfi
.phases
:
534 m
.d
.comb
+= phase
.rddata_valid
.eq(rddata_valid
)
536 # Write Control Path -----------------------------------------------------------------------
537 # Creates a shift register of write commands coming from the DFI interface. This shift register
538 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
539 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
540 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
541 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
542 # FIXME: understand +2
543 wrdata_en
= Signal(cwl_sys_latency
+ 4)
544 wrdata_en_last
= Signal
.like(wrdata_en
)
545 m
.d
.comb
+= wrdata_en
.eq(Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
546 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
547 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 1] | wrdata_en
[cwl_sys_latency
+ 2])
548 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
549 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
551 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
552 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
553 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
554 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
555 m
.d
.comb
+= dqs_preamble
.eq(wrdata_en
[cwl_sys_latency
+ 0] & ~wrdata_en
[cwl_sys_latency
+ 1])
556 m
.d
.comb
+= dqs_postamble
.eq(wrdata_en
[cwl_sys_latency
+ 3] & ~wrdata_en
[cwl_sys_latency
+ 2])