Add Tercel PHY reset synchronization
[microwatt.git] / soc.vhdl
index db52db3408335ada16134eccb4642f44d6b6d163..0295848a505c5baeaf817004c71e4ac749e91f22 100644 (file)
--- a/soc.vhdl
+++ b/soc.vhdl
@@ -10,43 +10,112 @@ use work.common.all;
 use work.wishbone_types.all;
 
 
--- Memory map:
+-- Memory map. *** Keep include/microwatt_soc.h updated on changes ***
 --
+-- Main bus:
 -- 0x00000000: Block RAM (MEMORY_SIZE) or DRAM depending on syscon
 -- 0x40000000: DRAM (when present)
+-- 0x80000000: Block RAM (aliased & repeated)
+
+-- IO Bus:
 -- 0xc0000000: SYSCON
 -- 0xc0002000: UART0
+-- 0xc0003000: UART1 (if any)
 -- 0xc0004000: XICS ICP
--- 0xf0000000: Block RAM (aliased & repeated)
--- 0xffff0000: DRAM init code (if any)
+-- 0xc0005000: XICS ICS
+-- 0xc0006000: SPI Flash controller
+-- 0xc8nnnnnn: External IO bus
+-- 0xf0000000: Flash "ROM" mapping
+-- 0xff000000: DRAM init code (if any) or flash ROM (**)
+
+-- External IO bus:
+-- 0xc8000000: LiteDRAM control (CSRs)
+-- 0xc8020000: LiteEth CSRs (*)
+-- 0xc8030000: LiteEth MMIO (*)
+
+-- (*) LiteEth must be a single aligned 32KB block as the CSRs and MMIOs
+--     are actually decoded as a single wishbone which LiteEth will
+--     internally split based on bit 16.
+
+-- (**) DRAM init code is currently special and goes to the external
+--      IO bus, this will be fixed when it's moved out of litedram and
+--      into the main SoC once we have a common "firmware".
+
+-- Interrupt numbers:
+--
+--   0  : UART0
+--   1  : Ethernet
 
 entity soc is
     generic (
-       MEMORY_SIZE   : positive;
-       RAM_INIT_FILE : string;
-       RESET_LOW     : boolean;
-       CLK_FREQ      : positive;
-       SIM           : boolean;
+       MEMORY_SIZE        : natural;
+       RAM_INIT_FILE      : string;
+       CLK_FREQ           : positive;
+       SIM                : boolean;
+        HAS_FPU            : boolean := true;
+        HAS_BTC            : boolean := true;
        DISABLE_FLATTEN_CORE : boolean := false;
-       HAS_DRAM      : boolean  := false;
-       DRAM_SIZE     : integer := 0
+       HAS_DRAM           : boolean  := false;
+       DRAM_SIZE          : integer := 0;
+        DRAM_INIT_SIZE     : integer := 0;
+        HAS_SPI_FLASH      : boolean := false;
+        SPI_FLASH_DLINES   : positive := 1;
+        SPI_FLASH_OFFSET   : integer := 0;
+        SPI_FLASH_DEF_CKDV : natural := 2;
+        SPI_FLASH_DEF_QUAD : boolean := false;
+        SPI_BOOT_CLOCKS    : boolean := true;
+        LOG_LENGTH         : natural := 512;
+        HAS_LITEETH        : boolean := false;
+        HAS_TERCEL         : boolean := false;
+        HAS_LPC_SLAVE      : boolean := false;
+       UART0_IS_16550     : boolean := true;
+       HAS_UART1          : boolean := false;
+        ICACHE_NUM_LINES   : natural := 64;
+        ICACHE_NUM_WAYS    : natural := 2;
+        ICACHE_TLB_SIZE    : natural := 64;
+        DCACHE_NUM_LINES   : natural := 64;
+        DCACHE_NUM_WAYS    : natural := 2;
+        DCACHE_TLB_SET_SIZE : natural := 64;
+        DCACHE_TLB_NUM_WAYS : natural := 2
        );
     port(
        rst          : in  std_ulogic;
        system_clk   : in  std_ulogic;
 
-       -- DRAM controller signals
-       wb_dram_in   : out wishbone_master_out;
-       wb_dram_out  : in wishbone_slave_out;
-       wb_dram_csr  : out std_ulogic;
-       wb_dram_init : out std_ulogic;
+       -- "Large" (64-bit) DRAM wishbone
+       wb_dram_in       : out wishbone_master_out;
+       wb_dram_out      : in wishbone_slave_out := wishbone_slave_out_init;
+
+        -- "Small" (32-bit) external IO wishbone
+       wb_ext_io_in         : out wb_io_master_out;
+       wb_ext_io_out        : in wb_io_slave_out := wb_io_slave_out_init;
+       wb_ext_is_dram_csr   : out std_ulogic;
+       wb_ext_is_dram_init  : out std_ulogic;
+       wb_ext_is_eth        : out std_ulogic;
+        wb_ext_is_tercel     : out std_ulogic;
+        wb_ext_is_lpc_slave  : out std_ulogic;
+
+        -- External interrupts
+        ext_irq_eth          : in std_ulogic := '0';
 
        -- UART0 signals:
        uart0_txd    : out std_ulogic;
-       uart0_rxd    : in  std_ulogic;
+       uart0_rxd    : in  std_ulogic := '0';
+
+       -- UART1 signals:
+       uart1_txd    : out std_ulogic;
+       uart1_rxd    : in  std_ulogic := '0';
+
+        -- SPI Flash controller signals:
+       wb_spiflash_in       : out wb_io_master_out;
+       wb_spiflash_out      : in wb_io_slave_out := wb_io_slave_out_init;
+
+       -- LPC slave DMA signals
+       wb_lpc_slave_in      : out wishbone_slave_out;
+       wb_lpc_slave_out     : in wishbone_master_out;
 
        -- DRAM controller signals
-       alt_reset    : in std_ulogic
+       alt_reset    : in std_ulogic := '0'
        );
 end entity soc;
 
@@ -59,10 +128,12 @@ architecture behaviour of soc is
     signal wishbone_icore_out : wishbone_master_out;
     signal wishbone_debug_in : wishbone_slave_out;
     signal wishbone_debug_out : wishbone_master_out;
+    signal wishbone_lpc_in    : wishbone_slave_out;
+    signal wishbone_lpc_out   : wishbone_master_out;
 
     -- Arbiter array (ghdl doesnt' support assigning the array
     -- elements in the entity instantiation)
-    constant NUM_WB_MASTERS : positive := 3;
+    constant NUM_WB_MASTERS : positive := 4;
     signal wb_masters_out : wishbone_master_out_vector(0 to NUM_WB_MASTERS-1);
     signal wb_masters_in  : wishbone_slave_out_vector(0 to NUM_WB_MASTERS-1);
 
@@ -70,28 +141,44 @@ architecture behaviour of soc is
     signal wb_master_in       : wishbone_slave_out;
     signal wb_master_out      : wishbone_master_out;
 
+    -- Main "IO" bus, from main slave decoder to the latch
+    signal wb_io_in     : wishbone_master_out;
+    signal wb_io_out    : wishbone_slave_out;
+
+    -- Secondary (smaller) IO bus after the IO bus latch
+    signal wb_sio_out    : wb_io_master_out;
+    signal wb_sio_in     : wb_io_slave_out;
+
     -- Syscon signals
     signal dram_at_0     : std_ulogic;
-    signal core_reset    : std_ulogic;
-    signal wb_syscon_in  : wishbone_master_out;
-    signal wb_syscon_out : wishbone_slave_out;
+    signal do_core_reset    : std_ulogic;
+    signal wb_syscon_in  : wb_io_master_out;
+    signal wb_syscon_out : wb_io_slave_out;
 
     -- UART0 signals:
-    signal wb_uart0_in   : wishbone_master_out;
-    signal wb_uart0_out  : wishbone_slave_out;
-    signal uart_dat8     : std_ulogic_vector(7 downto 0);
-
-    -- XICS0 signals:
-    signal wb_xics0_in   : wishbone_master_out;
-    signal wb_xics0_out  : wishbone_slave_out;
-    signal int_level_in  : std_ulogic_vector(15 downto 0);
-
-    signal xics_to_execute1 : XicsToExecute1Type;
+    signal wb_uart0_in   : wb_io_master_out;
+    signal wb_uart0_out  : wb_io_slave_out;
+    signal uart0_dat8    : std_ulogic_vector(7 downto 0);
+    signal uart0_irq     : std_ulogic;
+
+    -- UART1 signals:
+    signal wb_uart1_in   : wb_io_master_out;
+    signal wb_uart1_out  : wb_io_slave_out;
+    signal uart1_dat8    : std_ulogic_vector(7 downto 0);
+    signal uart1_irq     : std_ulogic;
+
+    -- XICS signals:
+    signal wb_xics_icp_in   : wb_io_master_out;
+    signal wb_xics_icp_out  : wb_io_slave_out;
+    signal wb_xics_ics_in   : wb_io_master_out;
+    signal wb_xics_ics_out  : wb_io_slave_out;
+    signal int_level_in     : std_ulogic_vector(15 downto 0);
+    signal ics_to_icp       : ics_to_icp_t;
+    signal core_ext_irq     : std_ulogic;
 
     -- Main memory signals:
     signal wb_bram_in     : wishbone_master_out;
     signal wb_bram_out    : wishbone_slave_out;
-    constant mem_adr_bits : positive := positive(ceil(log2(real(MEMORY_SIZE))));
 
     -- DMI debug bus signals
     signal dmi_addr    : std_ulogic_vector(7 downto 0);
@@ -108,19 +195,91 @@ architecture behaviour of soc is
     signal dmi_core_dout  : std_ulogic_vector(63 downto 0);
     signal dmi_core_req   : std_ulogic;
     signal dmi_core_ack   : std_ulogic;
+
+    -- Delayed/latched resets and alt_reset
+    signal rst_core    : std_ulogic := '1';
+    signal rst_uart    : std_ulogic := '1';
+    signal rst_xics    : std_ulogic := '1';
+    signal rst_spi     : std_ulogic := '1';
+    signal rst_bram    : std_ulogic := '1';
+    signal rst_dtm     : std_ulogic := '1';
+    signal rst_wbar    : std_ulogic := '1';
+    signal rst_wbdb    : std_ulogic := '1';
+    signal alt_reset_d : std_ulogic;
+
+    -- IO branch split:
+    type slave_io_type is (SLAVE_IO_SYSCON,
+                           SLAVE_IO_UART,
+                           SLAVE_IO_ICP,
+                           SLAVE_IO_ICS,
+                           SLAVE_IO_UART1,
+                           SLAVE_IO_SPI_FLASH_MAP,
+                           SLAVE_IO_EXTERNAL,
+                           SLAVE_IO_NONE);
+    signal slave_io_dbg : slave_io_type;
+
+    -- This is the component exported by the 16550 compatible
+    -- UART from FuseSoC.
+    --
+    component uart_top port (
+        wb_clk_i    : in std_ulogic;
+        wb_rst_i    : in std_ulogic;
+        wb_adr_i    : in std_ulogic_vector(2 downto 0);
+        wb_dat_i    : in std_ulogic_vector(7 downto 0);
+        wb_dat_o    : out std_ulogic_vector(7 downto 0);
+        wb_we_i     : in std_ulogic;
+        wb_stb_i    : in std_ulogic;
+        wb_cyc_i    : in std_ulogic;
+        wb_ack_o    : out std_ulogic;
+        int_o       : out std_ulogic;
+        stx_pad_o   : out std_ulogic;
+        srx_pad_i   : in std_ulogic;
+        rts_pad_o   : out std_ulogic;
+        cts_pad_i   : in std_ulogic;
+        dtr_pad_o   : out std_ulogic;
+        dsr_pad_i   : in std_ulogic;
+        ri_pad_i    : in std_ulogic;
+        dcd_pad_i   : in std_ulogic
+        );
+    end component;
 begin
 
+    resets: process(system_clk)
+    begin
+        if rising_edge(system_clk) then
+            rst_core    <= rst or do_core_reset;
+            rst_uart    <= rst;
+            rst_spi     <= rst;
+            rst_xics    <= rst;
+            rst_bram    <= rst;
+            rst_dtm     <= rst;
+            rst_wbar    <= rst;
+            rst_wbdb    <= rst;
+            alt_reset_d <= alt_reset;
+        end if;
+    end process;
+
     -- Processor core
     processor: entity work.core
        generic map(
            SIM => SIM,
+            HAS_FPU => HAS_FPU,
+            HAS_BTC => HAS_BTC,
            DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
-           ALT_RESET_ADDRESS => (15 downto 0 => '0', others => '1')
+           ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1'),
+            LOG_LENGTH => LOG_LENGTH,
+            ICACHE_NUM_LINES => ICACHE_NUM_LINES,
+            ICACHE_NUM_WAYS => ICACHE_NUM_WAYS,
+            ICACHE_TLB_SIZE => ICACHE_TLB_SIZE,
+            DCACHE_NUM_LINES => DCACHE_NUM_LINES,
+            DCACHE_NUM_WAYS => DCACHE_NUM_WAYS,
+            DCACHE_TLB_SET_SIZE => DCACHE_TLB_SET_SIZE,
+            DCACHE_TLB_NUM_WAYS => DCACHE_TLB_NUM_WAYS
            )
        port map(
            clk => system_clk,
-           rst => rst or core_reset,
-           alt_reset => alt_reset,
+           rst => rst_core,
+           alt_reset => alt_reset_d,
            wishbone_insn_in => wishbone_icore_in,
            wishbone_insn_out => wishbone_icore_out,
            wishbone_data_in => wishbone_dcore_in,
@@ -131,112 +290,353 @@ begin
            dmi_wr => dmi_wr,
            dmi_ack => dmi_core_ack,
            dmi_req => dmi_core_req,
-           xics_in => xics_to_execute1
+           ext_irq => core_ext_irq
            );
 
     -- Wishbone bus master arbiter & mux
     wb_masters_out <= (0 => wishbone_dcore_out,
                       1 => wishbone_icore_out,
-                      2 => wishbone_debug_out);
+                      2 => wishbone_debug_out,
+                      3 => wishbone_lpc_out);
     wishbone_dcore_in <= wb_masters_in(0);
     wishbone_icore_in <= wb_masters_in(1);
     wishbone_debug_in <= wb_masters_in(2);
+    wishbone_lpc_in   <= wb_masters_in(3);
     wishbone_arbiter_0: entity work.wishbone_arbiter
        generic map(
            NUM_MASTERS => NUM_WB_MASTERS
            )
        port map(
-           clk => system_clk, rst => rst,
+           clk => system_clk,
+            rst => rst_wbar,
            wb_masters_in => wb_masters_out,
            wb_masters_out => wb_masters_in,
            wb_slave_out => wb_master_out,
            wb_slave_in => wb_master_in
            );
 
-    -- Wishbone slaves address decoder & mux
-    slave_intercon: process(wb_master_out, wb_bram_out, wb_uart0_out, wb_dram_out, wb_syscon_out)
-       -- Selected slave
-       type slave_type is (SLAVE_SYSCON,
-                           SLAVE_UART,
-                           SLAVE_BRAM,
-                           SLAVE_DRAM,
-                           SLAVE_DRAM_INIT,
-                           SLAVE_DRAM_CSR,
-                           SLAVE_ICP_0,
-                           SLAVE_NONE);
-       variable slave : slave_type;
+    -- Top level Wishbone slaves address decoder & mux
+    --
+    -- From CPU to BRAM, DRAM, IO, selected on top 3 bits and dram_at_0
+    -- 0000  - BRAM
+    -- 0001  - DRAM
+    -- 01xx  - DRAM
+    -- 10xx  - BRAM
+    -- 11xx  - IO
+    --
+    slave_top_intercon: process(wb_master_out, wb_bram_out, wb_dram_out, wb_io_out, dram_at_0)
+       type slave_top_type is (SLAVE_TOP_BRAM,
+                                SLAVE_TOP_DRAM,
+                                SLAVE_TOP_IO);
+       variable slave_top : slave_top_type;
+        variable top_decode : std_ulogic_vector(3 downto 0);
     begin
-       -- Simple address decoder.
-       slave := SLAVE_NONE;
-       -- Simple address decoder. Ignore top bits to save silicon for now
-       slave := SLAVE_NONE;
-       if    std_match(wb_master_out.adr, x"0-------") then
-           slave := SLAVE_DRAM when HAS_DRAM and dram_at_0 = '1' else
-                    SLAVE_BRAM;
-       elsif std_match(wb_master_out.adr, x"FFFF----") then
-           slave := SLAVE_DRAM_INIT;
-       elsif std_match(wb_master_out.adr, x"F-------") then
-           slave := SLAVE_BRAM;
-       elsif std_match(wb_master_out.adr, x"4-------") and HAS_DRAM then
-           slave := SLAVE_DRAM;
-       elsif std_match(wb_master_out.adr, x"C0000---") then
-           slave := SLAVE_SYSCON;
-       elsif std_match(wb_master_out.adr, x"C0002---") then
-           slave := SLAVE_UART;
-       elsif std_match(wb_master_out.adr, x"C01-----") then
-           slave := SLAVE_DRAM_CSR;
-       elsif std_match(wb_master_out.adr, x"C0004---") then
-           slave := SLAVE_ICP_0;
+       -- Top-level address decoder
+        top_decode := wb_master_out.adr(31 downto 29) & dram_at_0;
+        slave_top := SLAVE_TOP_BRAM;
+       if    std_match(top_decode, "0000") then
+           slave_top := SLAVE_TOP_BRAM;
+       elsif std_match(top_decode, "0001") then
+           slave_top := SLAVE_TOP_DRAM;
+       elsif std_match(top_decode, "01--") then
+           slave_top := SLAVE_TOP_DRAM;
+       elsif std_match(top_decode, "10--") then
+           slave_top := SLAVE_TOP_BRAM;
+       elsif std_match(top_decode, "11--") then
+           slave_top := SLAVE_TOP_IO;
        end if;
 
-       -- Wishbone muxing. Defaults:
+       -- Top level wishbone muxing.
        wb_bram_in <= wb_master_out;
        wb_bram_in.cyc  <= '0';
-       wb_uart0_in <= wb_master_out;
-       wb_uart0_in.cyc <= '0';
-
-        -- Only give xics 8 bits of wb addr
-       wb_xics0_in <= wb_master_out;
-       wb_xics0_in.adr <= (others => '0');
-       wb_xics0_in.adr(7 downto 0) <= wb_master_out.adr(7 downto 0);
-       wb_xics0_in.cyc  <= '0';
-
        wb_dram_in <= wb_master_out;
-       wb_dram_in.cyc <= '0';
-       wb_dram_csr <= '0';
-       wb_dram_init <= '0';
-       wb_syscon_in <= wb_master_out;
-       wb_syscon_in.cyc <= '0';
-       case slave is
-       when SLAVE_BRAM =>
+       wb_dram_in.cyc  <= '0';
+       wb_io_in <= wb_master_out;
+       wb_io_in.cyc  <= '0';
+       case slave_top is
+       when SLAVE_TOP_BRAM =>
            wb_bram_in.cyc <= wb_master_out.cyc;
            wb_master_in <= wb_bram_out;
-       when SLAVE_DRAM =>
-           wb_dram_in.cyc <= wb_master_out.cyc;
-           wb_master_in <= wb_dram_out;
-       when SLAVE_DRAM_INIT =>
-           wb_dram_in.cyc <= wb_master_out.cyc;
-           wb_master_in <= wb_dram_out;
-           wb_dram_init <= '1';
-       when SLAVE_DRAM_CSR =>
-           wb_dram_in.cyc <= wb_master_out.cyc;
-           wb_master_in <= wb_dram_out;
-           wb_dram_csr <= '1';
-       when SLAVE_SYSCON =>
-           wb_syscon_in.cyc <= wb_master_out.cyc;
-           wb_master_in <= wb_syscon_out;
-       when SLAVE_UART =>
-           wb_uart0_in.cyc <= wb_master_out.cyc;
-           wb_master_in <= wb_uart0_out;
-       when SLAVE_ICP_0 =>
-           wb_xics0_in.cyc <= wb_master_out.cyc;
-           wb_master_in <= wb_xics0_out;
+       when SLAVE_TOP_DRAM =>
+            if HAS_DRAM then
+                wb_dram_in.cyc <= wb_master_out.cyc;
+                wb_master_in <= wb_dram_out;
+            else
+                wb_master_in.ack <= wb_master_out.cyc and wb_master_out.stb;
+                wb_master_in.dat <= (others => '1');
+                wb_master_in.stall <= '0';
+            end if;
+       when SLAVE_TOP_IO =>
+           wb_io_in.cyc <= wb_master_out.cyc;
+           wb_master_in <= wb_io_out;
+       end case;
+
+    end process slave_top_intercon;
+
+    -- IO wishbone slave 64->32 bits converter
+    --
+    -- For timing reasons, this adds a one cycle latch on the way both
+    -- in and out. This relaxes timing and routing pressure on the "main"
+    -- memory bus by moving all simple IOs to a slower 32-bit bus.
+    --
+    -- This implementation is rather dumb at the moment, no stash buffer,
+    -- so we stall whenever that latch is busy. This can be improved.
+    --
+    slave_io_latch: process(system_clk)
+        -- State
+        type state_t is (IDLE, WAIT_ACK_BOT, WAIT_ACK_TOP);
+        variable state : state_t;
+
+        -- Misc
+        variable has_top : boolean;
+        variable has_bot : boolean;
+    begin
+        if rising_edge(system_clk) then
+            if (rst) then
+                state := IDLE;
+                wb_io_out.ack <= '0';
+                wb_io_out.stall <= '0';
+                wb_sio_out.cyc <= '0';
+                wb_sio_out.stb <= '0';
+                has_top := false;
+                has_bot := false;
+            else
+                case state is
+                when IDLE =>
+                    -- Clear ACK in case it was set
+                    wb_io_out.ack <= '0';
+
+                    -- Do we have a cycle ?
+                    if wb_io_in.cyc = '1' and wb_io_in.stb = '1' then
+                        -- Stall master until we are done, we are't (yet) pipelining
+                        -- this, it's all slow IOs.
+                        wb_io_out.stall <= '1';
+
+                        -- Start cycle downstream
+                        wb_sio_out.cyc <= '1';
+                        wb_sio_out.stb <= '1';
+
+                        -- Copy write enable to IO out, copy address as well
+                        wb_sio_out.we <= wb_io_in.we;
+                        wb_sio_out.adr <= wb_io_in.adr(wb_sio_out.adr'left downto 3) & "000";
+
+                        -- Do we have a top word and/or a bottom word ?
+                        has_top := wb_io_in.sel(7 downto 4) /= "0000";
+                        has_bot := wb_io_in.sel(3 downto 0) /= "0000";
+
+                        -- If we have a bottom word, handle it first, otherwise
+                        -- send the top word down. XXX Split the actual mux out
+                        -- and only generate a control signal.
+                        if has_bot then
+                            if wb_io_in.we = '1' then
+                                wb_sio_out.dat <= wb_io_in.dat(31 downto 0);
+                            end if;
+                            wb_sio_out.sel <= wb_io_in.sel(3 downto 0);
+
+                            -- Wait for ack
+                            state := WAIT_ACK_BOT;
+                        else
+                            if wb_io_in.we = '1' then
+                                wb_sio_out.dat <= wb_io_in.dat(63 downto 32);
+                            end if;
+                            wb_sio_out.sel <= wb_io_in.sel(7 downto 4);
+
+                            -- Bump address
+                            wb_sio_out.adr(2) <= '1';
+
+                            -- Wait for ack
+                            state := WAIT_ACK_TOP;
+                        end if;
+                    end if;
+                when WAIT_ACK_BOT =>
+                    -- If we aren't stalled by the device, clear stb
+                    if wb_sio_in.stall = '0' then
+                        wb_sio_out.stb <= '0';
+                    end if;
+
+                    -- Handle ack
+                    if wb_sio_in.ack = '1' then
+                        -- If it's a read, latch the data
+                        if wb_sio_out.we = '0' then
+                            wb_io_out.dat(31 downto 0) <= wb_sio_in.dat;
+                        end if;
+
+                        -- Do we have a "top" part as well ?
+                        if has_top then
+                            -- Latch data & sel
+                            if wb_io_in.we = '1' then
+                                wb_sio_out.dat <= wb_io_in.dat(63 downto 32);
+                            end if;
+                            wb_sio_out.sel <= wb_io_in.sel(7 downto 4);
+
+                            -- Bump address and set STB
+                            wb_sio_out.adr(2) <= '1';
+                            wb_sio_out.stb <= '1';
+
+                            -- Wait for new ack
+                            state := WAIT_ACK_TOP;
+                        else
+                            -- We are done, ack up, clear cyc downstram
+                            wb_sio_out.cyc <= '0';
+
+                            -- And ack & unstall upstream
+                            wb_io_out.ack <= '1';
+                            wb_io_out.stall <= '0';
+
+                            -- Wait for next one
+                            state := IDLE;
+                        end if;
+                    end if;
+                when WAIT_ACK_TOP =>
+                    -- If we aren't stalled by the device, clear stb
+                    if wb_sio_in.stall = '0' then
+                        wb_sio_out.stb <= '0';
+                    end if;
+
+                    -- Handle ack
+                    if wb_sio_in.ack = '1' then
+                        -- If it's a read, latch the data
+                        if wb_sio_out.we = '0' then
+                            wb_io_out.dat(63 downto 32) <= wb_sio_in.dat;
+                        end if;
+
+                        -- We are done, ack up, clear cyc downstram
+                        wb_sio_out.cyc <= '0';
+
+                        -- And ack & unstall upstream
+                        wb_io_out.ack <= '1';
+                        wb_io_out.stall <= '0';
+
+                        -- Wait for next one
+                        state := IDLE;
+                    end if;
+                end case;
+            end if;
+        end if;
+    end process;
+            
+    -- IO wishbone slave intercon.
+    --
+    slave_io_intercon: process(wb_sio_out, wb_syscon_out, wb_uart0_out, wb_uart1_out,
+                               wb_ext_io_out, wb_xics_icp_out, wb_xics_ics_out,
+                               wb_spiflash_out)
+       variable slave_io : slave_io_type;
+
+        variable match : std_ulogic_vector(31 downto 12);
+        variable ext_valid : boolean;
+    begin
+
+       -- Simple address decoder.
+       slave_io := SLAVE_IO_NONE;
+        match := "11" & wb_sio_out.adr(29 downto 12);
+        if    std_match(match, x"FF---") and HAS_DRAM then
+           slave_io := SLAVE_IO_EXTERNAL;
+        elsif std_match(match, x"F----") then
+           slave_io := SLAVE_IO_SPI_FLASH_MAP;
+       elsif std_match(match, x"C0000") then
+           slave_io := SLAVE_IO_SYSCON;
+       elsif std_match(match, x"C0002") then
+           slave_io := SLAVE_IO_UART;
+       elsif std_match(match, x"C0003") then
+           slave_io := SLAVE_IO_UART1;
+       elsif std_match(match, x"C8---") then
+           slave_io := SLAVE_IO_EXTERNAL;
+       elsif std_match(match, x"C0004") then
+           slave_io := SLAVE_IO_ICP;
+       elsif std_match(match, x"C0005") then
+           slave_io := SLAVE_IO_ICS;
+       end if;
+        slave_io_dbg <= slave_io;
+       wb_uart0_in <= wb_sio_out;
+       wb_uart0_in.cyc <= '0';
+       wb_uart1_in <= wb_sio_out;
+       wb_uart1_in.cyc <= '0';
+       wb_spiflash_in <= wb_sio_out;
+       wb_spiflash_in.cyc <= '0';
+
+        -- Only give xics 8 bits of wb addr (for now...)
+       wb_xics_icp_in <= wb_sio_out;
+       wb_xics_icp_in.adr <= (others => '0');
+       wb_xics_icp_in.adr(7 downto 0) <= wb_sio_out.adr(7 downto 0);
+       wb_xics_icp_in.cyc  <= '0';
+       wb_xics_ics_in <= wb_sio_out;
+       wb_xics_ics_in.adr <= (others => '0');
+       wb_xics_ics_in.adr(11 downto 0) <= wb_sio_out.adr(11 downto 0);
+       wb_xics_ics_in.cyc  <= '0';
+
+       wb_ext_io_in <= wb_sio_out;
+       wb_ext_io_in.cyc <= '0';
+
+       wb_syscon_in <= wb_sio_out;
+       wb_syscon_in.cyc <= '0';
+
+       wb_ext_is_dram_csr   <= '0';
+       wb_ext_is_dram_init  <= '0';
+       wb_ext_is_eth        <= '0';
+
+        -- Default response, ack & return all 1's
+        wb_sio_in.dat <= (others => '1');
+        wb_sio_in.ack <= wb_sio_out.stb and wb_sio_out.cyc;
+        wb_sio_in.stall <= '0';
+
+       case slave_io is
+       when SLAVE_IO_EXTERNAL =>
+            -- Ext IO "chip selects"
+            --
+            -- DRAM init is special at 0xFF* so we just test the top
+            -- bit. Everything else is at 0xC8* so we test only bits
+            -- 23 downto 16.
+            --
+            ext_valid := false;
+            if wb_sio_out.adr(29) = '1' and HAS_DRAM then  -- DRAM init is special
+                wb_ext_is_dram_init <= '1';
+                ext_valid := true;
+            elsif wb_sio_out.adr(23 downto 16) = x"00" and HAS_DRAM then
+                wb_ext_is_dram_csr  <= '1';
+                ext_valid := true;
+            elsif wb_sio_out.adr(23 downto 16) = x"02" and HAS_LITEETH then
+                wb_ext_is_eth       <= '1';
+                ext_valid := true;
+            elsif wb_sio_out.adr(23 downto 16) = x"03" and HAS_LITEETH then
+                wb_ext_is_eth       <= '1';
+                ext_valid := true;
+            elsif wb_sio_out.adr(21 downto 14) = x"04" and HAS_LPC_SLAVE then
+                wb_ext_is_lpc_slave <= '1';
+                ext_valid := true;
+            elsif wb_sio_out.adr(21 downto 14) = x"05" and HAS_TERCEL then
+                wb_ext_is_tercel <= '1';
+                ext_valid := true;
+            end if;
+            if ext_valid then
+                wb_ext_io_in.cyc <= wb_sio_out.cyc;
+                wb_sio_in <= wb_ext_io_out;
+            end if;
+
+       when SLAVE_IO_SYSCON =>
+           wb_syscon_in.cyc <= wb_sio_out.cyc;
+           wb_sio_in <= wb_syscon_out;
+       when SLAVE_IO_UART =>
+           wb_uart0_in.cyc <= wb_sio_out.cyc;
+           wb_sio_in <= wb_uart0_out;
+       when SLAVE_IO_ICP =>
+           wb_xics_icp_in.cyc <= wb_sio_out.cyc;
+           wb_sio_in <= wb_xics_icp_out;
+       when SLAVE_IO_ICS =>
+           wb_xics_ics_in.cyc <= wb_sio_out.cyc;
+           wb_sio_in <= wb_xics_ics_out;
+       when SLAVE_IO_UART1 =>
+           wb_uart1_in.cyc <= wb_sio_out.cyc;
+           wb_sio_in <= wb_uart1_out;
+       when SLAVE_IO_SPI_FLASH_MAP =>
+            -- Clear top bits so they don't make their way to the
+            -- fash chip.
+            wb_spiflash_in.adr(29 downto 28) <= "00";
+           wb_spiflash_in.cyc <= wb_sio_out.cyc;
+           wb_sio_in <= wb_spiflash_out;
        when others =>
-           wb_master_in.dat <= (others => '1');
-           wb_master_in.ack <= wb_master_out.stb and wb_master_out.cyc;
-           wb_master_in.stall <= '0';
        end case;
-    end process slave_intercon;
+
+    end process;
 
     -- Syscon slave
     syscon0: entity work.syscon
@@ -245,7 +645,13 @@ begin
            HAS_DRAM => HAS_DRAM,
            BRAM_SIZE => MEMORY_SIZE,
            DRAM_SIZE => DRAM_SIZE,
-           CLK_FREQ => CLK_FREQ
+           DRAM_INIT_SIZE => DRAM_INIT_SIZE,
+           CLK_FREQ => CLK_FREQ,
+           HAS_SPI_FLASH => HAS_SPI_FLASH,
+           SPI_FLASH_OFFSET => SPI_FLASH_OFFSET,
+            HAS_LITEETH => HAS_LITEETH,
+            UART0_IS_16550 => UART0_IS_16550,
+            HAS_UART1 => HAS_UART1
        )
        port map(
            clk => system_clk,
@@ -253,63 +659,173 @@ begin
            wishbone_in => wb_syscon_in,
            wishbone_out => wb_syscon_out,
            dram_at_0 => dram_at_0,
-           core_reset => core_reset,
+           core_reset => do_core_reset,
            soc_reset => open -- XXX TODO
            );
 
-    -- Simulated memory and UART
-
-    -- UART0 wishbone slave
-    -- XXX FIXME: Need a proper wb64->wb8 adapter that
-    --            converts SELs into low address bits and muxes
-    --            data accordingly (either that or rejects large
-    --            cycles).
-    uart0: entity work.pp_soc_uart
-       generic map(
-           FIFO_DEPTH => 32
-           )
+    --
+    -- UART0
+    --
+    -- Either potato (legacy) or 16550
+    --
+    uart0_pp: if not UART0_IS_16550 generate
+       uart0: entity work.pp_soc_uart
+           generic map(
+               FIFO_DEPTH => 32
+               )
+           port map(
+               clk => system_clk,
+               reset => rst_uart,
+               txd => uart0_txd,
+               rxd => uart0_rxd,
+               irq => uart0_irq,
+               wb_adr_in => wb_uart0_in.adr(11 downto 0),
+               wb_dat_in => wb_uart0_in.dat(7 downto 0),
+               wb_dat_out => uart0_dat8,
+               wb_cyc_in => wb_uart0_in.cyc,
+               wb_stb_in => wb_uart0_in.stb,
+               wb_we_in => wb_uart0_in.we,
+               wb_ack_out => wb_uart0_out.ack
+               );
+    end generate;
+
+    uart0_16550 : if UART0_IS_16550 generate
+        signal irq_l : std_ulogic;
+    begin
+       uart0: uart_top
+           port map (
+               wb_clk_i   => system_clk,
+               wb_rst_i   => rst_uart,
+               wb_adr_i   => wb_uart0_in.adr(4 downto 2),
+               wb_dat_i   => wb_uart0_in.dat(7 downto 0),
+               wb_dat_o   => uart0_dat8,
+               wb_we_i    => wb_uart0_in.we,
+               wb_stb_i   => wb_uart0_in.stb,
+               wb_cyc_i   => wb_uart0_in.cyc,
+               wb_ack_o   => wb_uart0_out.ack,
+               int_o      => irq_l,
+               stx_pad_o  => uart0_txd,
+               srx_pad_i  => uart0_rxd,
+               rts_pad_o  => open,
+               cts_pad_i  => '1',
+               dtr_pad_o  => open,
+               dsr_pad_i  => '1',
+               ri_pad_i   => '0',
+               dcd_pad_i  => '1'
+               );
+
+        -- Add a register on the irq out, helps timing
+        uart0_irq_latch: process(system_clk)
+        begin
+            if rising_edge(system_clk) then
+                uart0_irq <= irq_l;
+            end if;
+        end process;
+    end generate;
+
+    wb_uart0_out.dat <= x"000000" & uart0_dat8;
+    wb_uart0_out.stall <= not wb_uart0_out.ack;
+
+    --
+    -- UART1
+    --
+    -- Always 16550 if it exists
+    --
+    uart1: if HAS_UART1 generate
+        signal irq_l : std_ulogic;
+    begin
+       uart1: uart_top
+           port map (
+               wb_clk_i   => system_clk,
+               wb_rst_i   => rst_uart,
+               wb_adr_i   => wb_uart1_in.adr(4 downto 2),
+               wb_dat_i   => wb_uart1_in.dat(7 downto 0),
+               wb_dat_o   => uart1_dat8,
+               wb_we_i    => wb_uart1_in.we,
+               wb_stb_i   => wb_uart1_in.stb,
+               wb_cyc_i   => wb_uart1_in.cyc,
+               wb_ack_o   => wb_uart1_out.ack,
+               int_o      => irq_l,
+               stx_pad_o  => uart1_txd,
+               srx_pad_i  => uart1_rxd,
+               rts_pad_o  => open,
+               cts_pad_i  => '1',
+               dtr_pad_o  => open,
+               dsr_pad_i  => '1',
+               ri_pad_i   => '0',
+               dcd_pad_i  => '1'
+               );
+        -- Add a register on the irq out, helps timing
+        uart0_irq_latch: process(system_clk)
+        begin
+            if rising_edge(system_clk) then
+                uart1_irq <= irq_l;
+            end if;
+        end process;
+       wb_uart1_out.dat <= x"000000" & uart1_dat8;
+       wb_uart1_out.stall <= not wb_uart1_out.ack;
+    end generate;
+
+    no_uart1 : if not HAS_UART1 generate
+       wb_uart1_out.dat <= x"00000000";
+       wb_uart1_out.ack <= wb_uart1_in.cyc and wb_uart1_in.stb;
+       wb_uart1_out.stall <= '0';
+        uart1_irq <= '0';
+    end generate;
+
+    xics_icp: entity work.xics_icp
        port map(
            clk => system_clk,
-           reset => rst,
-           txd => uart0_txd,
-           rxd => uart0_rxd,
-           irq => int_level_in(0),
-           wb_adr_in => wb_uart0_in.adr(11 downto 0),
-           wb_dat_in => wb_uart0_in.dat(7 downto 0),
-           wb_dat_out => uart_dat8,
-           wb_cyc_in => wb_uart0_in.cyc,
-           wb_stb_in => wb_uart0_in.stb,
-           wb_we_in => wb_uart0_in.we,
-           wb_ack_out => wb_uart0_out.ack
+           rst => rst_xics,
+           wb_in => wb_xics_icp_in,
+           wb_out => wb_xics_icp_out,
+            ics_in => ics_to_icp,
+           core_irq_out => core_ext_irq
            );
-    wb_uart0_out.dat <= x"00000000000000" & uart_dat8;
-    wb_uart0_out.stall <= '0' when wb_uart0_in.cyc = '0' else not wb_uart0_out.ack;
 
-    xics0: entity work.xics
+    xics_ics: entity work.xics_ics
        generic map(
-           LEVEL_NUM => 16
+           SRC_NUM   => 16,
+           PRIO_BITS => 3
            )
        port map(
            clk => system_clk,
-           rst => rst,
-           wb_in => wb_xics0_in,
-           wb_out => wb_xics0_out,
+           rst => rst_xics,
+           wb_in => wb_xics_ics_in,
+           wb_out => wb_xics_ics_out,
            int_level_in => int_level_in,
-           e_out => xics_to_execute1
+            icp_out => ics_to_icp
            );
 
+    -- Assign external interrupts
+    interrupts: process(all)
+    begin
+        int_level_in <= (others => '0');
+        int_level_in(0) <= uart0_irq;
+        int_level_in(1) <= ext_irq_eth;
+        int_level_in(2) <= uart1_irq;
+    end process;
+
     -- BRAM Memory slave
-    bram0: entity work.wishbone_bram_wrapper
-       generic map(
-           MEMORY_SIZE   => MEMORY_SIZE,
-           RAM_INIT_FILE => RAM_INIT_FILE
-           )
-       port map(
-           clk => system_clk,
-           rst => rst,
-           wishbone_in => wb_bram_in,
-           wishbone_out => wb_bram_out
-           );
+    bram: if MEMORY_SIZE /= 0 generate
+        bram0: entity work.wishbone_bram_wrapper
+            generic map(
+                MEMORY_SIZE   => MEMORY_SIZE,
+                RAM_INIT_FILE => RAM_INIT_FILE
+                )
+            port map(
+                clk => system_clk,
+                rst => rst_bram,
+                wishbone_in => wb_bram_in,
+                wishbone_out => wb_bram_out
+                );
+    end generate;
+
+    no_bram: if MEMORY_SIZE = 0 generate
+        wb_bram_out.ack <= wb_bram_in.cyc and wb_bram_in.stb;
+        wb_bram_out.dat <= x"FFFFFFFFFFFFFFFF";
+        wb_bram_out.stall <= not wb_bram_out.ack;
+    end generate;
 
     -- DMI(debug bus) <-> JTAG bridge
     dtm: entity work.dmi_dtm
@@ -319,7 +835,7 @@ begin
            )
        port map(
            sys_clk     => system_clk,
-           sys_reset   => rst,
+           sys_reset   => rst_dtm,
            dmi_addr    => dmi_addr,
            dmi_din     => dmi_din,
            dmi_dout    => dmi_dout,
@@ -377,7 +893,8 @@ begin
 
     -- Wishbone debug master (TODO: Add a DMI address decoder)
     wishbone_debug: entity work.wishbone_debug_master
-       port map(clk => system_clk, rst => rst,
+       port map(clk => system_clk,
+                 rst => rst_wbdb,
                 dmi_addr => dmi_addr(1 downto 0),
                 dmi_dout => dmi_wb_dout,
                 dmi_din => dmi_dout,
@@ -387,5 +904,51 @@ begin
                 wb_in => wishbone_debug_in,
                 wb_out => wishbone_debug_out);
 
+--pragma synthesis_off
+    wb_x_state: process(system_clk)
+    begin
+        if rising_edge(system_clk) then
+            if not rst then
+                -- Wishbone arbiter
+                assert not(is_x(wb_masters_out(0).cyc)) and not(is_x(wb_masters_out(0).stb)) severity failure;
+                assert not(is_x(wb_masters_out(1).cyc)) and not(is_x(wb_masters_out(1).stb)) severity failure;
+                assert not(is_x(wb_masters_out(2).cyc)) and not(is_x(wb_masters_out(2).stb)) severity failure;
+                assert not(is_x(wb_masters_out(3).cyc)) and not(is_x(wb_masters_out(3).stb)) severity failure;
+                assert not(is_x(wb_masters_in(0).ack)) severity failure;
+                assert not(is_x(wb_masters_in(1).ack)) severity failure;
+                assert not(is_x(wb_masters_in(2).ack)) severity failure;
+                assert not(is_x(wb_masters_in(3).ack)) severity failure;
+
+                -- Main memory wishbones
+                assert not(is_x(wb_bram_in.cyc)) and not (is_x(wb_bram_in.stb)) severity failure;
+                assert not(is_x(wb_dram_in.cyc)) and not (is_x(wb_dram_in.stb)) severity failure;
+                assert not(is_x(wb_io_in.cyc)) and not (is_x(wb_io_in.stb)) severity failure;
+                assert not(is_x(wb_bram_out.ack)) severity failure;
+                assert not(is_x(wb_dram_out.ack)) severity failure;
+                assert not(is_x(wb_io_out.ack)) severity failure;
+
+                -- I/O wishbones
+                assert not(is_x(wb_uart0_in.cyc)) and not(is_x(wb_uart0_in.stb)) severity failure;
+                assert not(is_x(wb_uart1_in.cyc)) and not(is_x(wb_uart1_in.stb)) severity failure;
+                assert not(is_x(wb_spiflash_in.cyc)) and not(is_x(wb_spiflash_in.stb)) severity failure;
+                assert not(is_x(wb_xics_icp_in.cyc)) and not(is_x(wb_xics_icp_in.stb)) severity failure;
+                assert not(is_x(wb_xics_ics_in.cyc)) and not(is_x(wb_xics_ics_in.stb)) severity failure;
+                assert not(is_x(wb_ext_io_in.cyc)) and not(is_x(wb_ext_io_in.stb)) severity failure;
+                assert not(is_x(wb_syscon_in.cyc)) and not(is_x(wb_syscon_in.stb)) severity failure;
+                assert not(is_x(wb_uart0_out.ack)) severity failure;
+                assert not(is_x(wb_uart1_out.ack)) severity failure;
+                assert not(is_x(wb_spiflash_out.ack)) severity failure;
+                assert not(is_x(wb_xics_icp_out.ack)) severity failure;
+                assert not(is_x(wb_xics_ics_out.ack)) severity failure;
+                assert not(is_x(wb_ext_io_out.ack)) severity failure;
+                assert not(is_x(wb_syscon_out.ack)) severity failure;
+            end if;
+        end if;
+    end process;
+--pragma synthesis_on
+
+    -- LPC slave DMA
+    wb_lpc_slave_in <= wishbone_lpc_in;
+    wishbone_lpc_out <= wb_lpc_slave_out;
 
 end architecture behaviour;