minor reorg, add alu
[rv32.git] / cpu.py
diff --git a/cpu.py b/cpu.py
index 9fa8596f008c55e0e0d1a7889191fb4f0af3c3c4..4e769f9b600610704959e9ad8e69cc5b1083bf44 100644 (file)
--- a/cpu.py
+++ b/cpu.py
 
 from migen import *
 from migen.fhdl import verilog
+from migen.fhdl.structure import _Operator
 
 from riscvdefs import *
 from cpudefs import *
 
+class MemoryInterface:
+    fetch_address = Signal(32, name="memory_interface_fetch_address") # XXX [2:]
+    fetch_data = Signal(32, name="memory_interface_fetch_data")
+    fetch_valid = Signal(name="memory_interface_fetch_valid")
+    rw_address= Signal(32, name="memory_interface_rw_address") # XXX [2:]
+    rw_byte_mask = Signal(4, name="memory_interface_rw_byte_mask")
+    rw_read_not_write = Signal(name="memory_interface_rw_read_not_write")
+    rw_active = Signal(name="memory_interface_rw_active")
+    rw_data_in = Signal(32, name="memory_interface_rw_data_in")
+    rw_data_out = Signal(32, name="memory_interface_rw_data_out")
+    rw_address_valid = Signal(name="memory_interface_rw_address_valid")
+    rw_wait = Signal(name="memory_interface_rw_wait")
+
+
+class Decoder:
+    funct7 = Signal(7, name="decoder_funct7")
+    funct3 = Signal(3, name="decoder_funct3")
+    rd = Signal(5, name="decoder_rd")
+    rs1 = Signal(5, name="decoder_rs1")
+    rs2 = Signal(5, name="decoder_rs2")
+    immediate = Signal(32, name="decoder_immediate")
+    opcode = Signal(7, name="decoder_opcode")
+    act = Signal(decode_action, name="decoder_action")
+
+
 class CPU(Module):
-    """ 
+    """
     """
 
-    def __init__(self):
-        self.instruction = Signal(32)
-        self.funct7 = Signal(7)
-        self.funct3 = Signal(3)
-        self.rd = Signal(5)
-        self.rs1 = Signal(5)
-        self.rs2 = Signal(5)
-        self.immediate = Signal(32)
-        self.opcode = Signal(7)
-        self.decode_action = Signal(decode_action)
+    def get_ls_misaligned(self, ls, funct3, load_store_address_low_2):
+        return Case(funct3[:2],
+                { F3.sb: ls.eq(Constant(0)),
+                  F3.sh: ls.eq(load_store_address_low_2[0] != 0),
+                  F3.sw: ls.eq(load_store_address_low_2[0:2] != Constant(0, 2)),
+                  "default": ls.eq(Constant(1))
+                })
+
+    def get_lsbm(self, dc):
+        return Cat(Constant(1),
+                   Mux((dc.funct3[1] | dc.funct3[0]),
+                       Constant(1), Constant(0)),
+                   Mux((dc.funct3[1]),
+                       Constant(0b11, 2), Constant(0, 2)))
 
+    def __init__(self):
+        self.clk = ClockSignal()
+        self.reset = ResetSignal()
+        self.tty_write = Signal()
+        self.tty_write_data = Signal(8)
+        self.tty_write_busy = Signal()
+        self.switch_2 = Signal()
+        self.switch_3 = Signal()
+        self.led_1 = Signal()
+        self.led_3 = Signal()
+
+        ram_size = Constant(0x8000)
+        ram_start = Constant(0x10000, 32)
+        reset_vector = Signal(32)
+        mtvec = Signal(32)
+
+        reset_vector.eq(ram_start)
+        mtvec.eq(ram_start + 0x40)
+
+        l = []
+        for i in range(31):
+            l.append(Signal(32, name="register%d" % i))
+        registers = Array(l)
+
+        mi = MemoryInterface()
+
+        mii = Instance("cpu_memory_interface", name="memory_instance",
+                    p_ram_size = ram_size,
+                    p_ram_start = ram_start,
+                    i_clk=ClockSignal(),
+                    i_rst=ResetSignal(),
+                    i_fetch_address = mi.fetch_address,
+                    o_fetch_data = mi.fetch_data,
+                    o_fetch_valid = mi.fetch_valid,
+                    i_rw_address = mi.rw_address,
+                    i_rw_byte_mask = mi.rw_byte_mask,
+                    i_rw_read_not_write = mi.rw_read_not_write,
+                    i_rw_active = mi.rw_active,
+                    i_rw_data_in = mi.rw_data_in,
+                    o_rw_data_out = mi.rw_data_out,
+                    o_rw_address_valid = mi.rw_address_valid,
+                    o_rw_wait = mi.rw_wait,
+                    o_tty_write = self.tty_write,
+                    o_tty_write_data = self.tty_write_data,
+                    i_tty_write_busy = self.tty_write_busy,
+                    i_switch_2 = self.switch_2,
+                    i_switch_3 = self.switch_3,
+                    o_led_1 = self.led_1,
+                    o_led_3 = self.led_3
+                  )
+        self.specials += mii
+
+        fetch_act = Signal(fetch_action)
+        fetch_target_pc = Signal(32)
+        fetch_output_pc = Signal(32)
+        fetch_output_instruction = Signal(32)
+        fetch_output_st = Signal(fetch_output_state)
+
+        fs = Instance("CPUFetchStage", name="fetch_stage",
+            i_clk=ClockSignal(),
+            i_rst=ResetSignal(),
+            o_memory_interface_fetch_address = mi.fetch_address,
+            i_memory_interface_fetch_data = mi.fetch_data,
+            i_memory_interface_fetch_valid = mi.fetch_valid,
+            i_fetch_action = fetch_act,
+            i_target_pc = fetch_target_pc,
+            o_output_pc = fetch_output_pc,
+            o_output_instruction = fetch_output_instruction,
+            o_output_state = fetch_output_st,
+            i_reset_vector = reset_vector,
+            i_mtvec = mtvec,
+        )
+        self.specials += fs
+
+        dc = Decoder()
+
+        cd = Instance("CPUDecoder", name="decoder",
+            i_instruction = fetch_output_instruction,
+            o_funct7 = dc.funct7,
+            o_funct3 = dc.funct3,
+            o_rd = dc.rd,
+            o_rs1 = dc.rs1,
+            o_rs2 = dc.rs2,
+            o_immediate = dc.immediate,
+            o_opcode = dc.opcode,
+            o_decode_action = dc.act
+        )
+        self.specials += cd
+
+        register_rs1 = Signal(32)
+        register_rs2 = Signal(32)
+        self.comb += If(dc.rs1 == 0,
+                        register_rs1.eq(0)
+                     ).Else(
+                        register_rs1.eq(registers[dc.rs1-1]))
+        self.comb += If(dc.rs2 == 0,
+                        register_rs2.eq(0)
+                     ).Else(
+                        register_rs2.eq(registers[dc.rs2-1]))
+
+        load_store_address = Signal(32)
+        load_store_address_low_2 = Signal(2)
+
+        self.comb += load_store_address.eq(dc.immediate + register_rs1)
+        self.comb += load_store_address_low_2.eq(
+                            dc.immediate[:2] + register_rs1[:2])
+
+        load_store_misaligned = Signal()
+
+        lsa = self.get_ls_misaligned(load_store_misaligned, dc.funct3,
+                                     load_store_address_low_2)
+        self.comb += lsa
+
+        # XXX rwaddr not 31:2 any more
+        self.comb += mi.rw_address.eq(load_store_address[2:])
+
+        unshifted_load_store_byte_mask = Signal(4)
+
+        self.comb += unshifted_load_store_byte_mask.eq(self.get_lsbm(dc))
+
+        # XXX yuck.  this will cause migen simulation to fail
+        # (however conversion to verilog works)
+        self.comb += mi.rw_byte_mask.eq(
+                _Operator("<<", [unshifted_load_store_byte_mask,
+                                        load_store_address_low_2]))
+
+        # XXX not obvious
+        b3 = Mux(load_store_address_low_2[1],
+                 Mux(load_store_address_low_2[0], register_rs2[0:8],
+                                                  register_rs2[8:16]),
+                 Mux(load_store_address_low_2[0], register_rs2[16:24],
+                                                  register_rs2[24:32]))
+        b2 = Mux(load_store_address_low_2[1], register_rs2[0:8],
+                                              register_rs2[16:24])
+        b1 = Mux(load_store_address_low_2[0], register_rs2[0:8],
+                                              register_rs2[8:16])
+        b0 = register_rs2[0:8]
+
+        self.comb += mi.rw_data_in.eq(Cat(b0, b1, b2, b3))
+
+        # XXX not obvious
+        unmasked_loaded_value = Signal(32)
+
+        b0 = Mux(load_store_address_low_2[1],
+                 Mux(load_store_address_low_2[0], mi.rw_data_out[24:32],
+                                                  mi.rw_data_out[16:24]),
+                 Mux(load_store_address_low_2[0], mi.rw_data_out[15:8],
+                                                  mi.rw_data_out[0:8]))
+        b1 = Mux(load_store_address_low_2[1], mi.rw_data_out[24:31],
+                                              mi.rw_data_out[8:16])
+        b23 = mi.rw_data_out[16:32]
+
+        self.comb += unmasked_loaded_value.eq(Cat(b0, b1, b23))
+
+        # XXX not obvious
+        loaded_value = Signal(32)
+
+        b0 = unmasked_loaded_value[0:8]
+        b1 = Mux(dc.funct3[0:2] == 0,
+                Replicate(~dc.funct3[2] & unmasked_loaded_value[7], 8),
+                unmasked_loaded_value[8:16])
+        b2 = Mux(dc.funct3[1] == 0,
+                Replicate(~dc.funct3[2] &
+                           Mux(dc.funct3[0], unmasked_loaded_value[15],
+                                                  unmasked_loaded_value[7]),
+                          16),
+                unmasked_loaded_value[16:32])
+
+        self.comb += loaded_value.eq(Cat(b0, b1, b2))
+
+        self.comb += mi.rw_active.eq(~self.reset
+                        & (fetch_output_st == fetch_output_state_valid)
+                        & ~load_store_misaligned
+                        & ((dc.act & (DA.load | DA.store)) != 0))
+
+        self.comb += mi.rw_read_not_write.eq(~dc.opcode[5])
+
+        # alu
+        alu_a = Signal(32)
+        alu_b = Signal(32)
+        alu_result = Signal(32)
+
+        self.comb += alu_a.eq(register_rs1)
+        self.comb += alu_b.eq(Mux(dc.opcode[5],
+                                  register_rs2,
+                                  dc.immediate))
 
 if __name__ == "__main__":
-    example = CPUDecoder()
+    example = CPU()
     print(verilog.convert(example,
          {
-           example.clk,
-           example.reset,
            example.tty_write,
            example.tty_write_data,
            example.tty_write_busy,
@@ -64,169 +278,6 @@ if __name__ == "__main__":
            }))
 
 """
-module cpu(
-    input clk,
-    input reset,
-    output tty_write,
-    output [7:0] tty_write_data,
-    input tty_write_busy,
-    input switch_2,
-    input switch_3,
-    output led_1,
-    output led_3
-    );
-
-    parameter ram_size = 'h8000;
-    parameter ram_start = 32'h1_0000;
-    parameter reset_vector = ram_start;
-    parameter mtvec = ram_start + 'h40;
-
-    reg [31:0] registers[31:1];
-
-    wire [31:2] memory_interface_fetch_address;
-    wire [31:0] memory_interface_fetch_data;
-    wire memory_interface_fetch_valid;
-    wire [31:2] memory_interface_rw_address;
-    wire [3:0] memory_interface_rw_byte_mask;
-    wire memory_interface_rw_read_not_write;
-    wire memory_interface_rw_active;
-    wire [31:0] memory_interface_rw_data_in;
-    wire [31:0] memory_interface_rw_data_out;
-    wire memory_interface_rw_address_valid;
-    wire memory_interface_rw_wait;
-
-    cpu_memory_interface #(
-        .ram_size(ram_size),
-        .ram_start(ram_start)
-        ) memory_interface(
-        .clk(clk),
-        .reset(reset),
-        .fetch_address(memory_interface_fetch_address),
-        .fetch_data(memory_interface_fetch_data),
-        .fetch_valid(memory_interface_fetch_valid),
-        .rw_address(memory_interface_rw_address),
-        .rw_byte_mask(memory_interface_rw_byte_mask),
-        .rw_read_not_write(memory_interface_rw_read_not_write),
-        .rw_active(memory_interface_rw_active),
-        .rw_data_in(memory_interface_rw_data_in),
-        .rw_data_out(memory_interface_rw_data_out),
-        .rw_address_valid(memory_interface_rw_address_valid),
-        .rw_wait(memory_interface_rw_wait),
-        .tty_write(tty_write),
-        .tty_write_data(tty_write_data),
-        .tty_write_busy(tty_write_busy),
-        .switch_2(switch_2),
-        .switch_3(switch_3),
-        .led_1(led_1),
-        .led_3(led_3)
-        );
-
-    wire `fetch_action fetch_action;
-    wire [31:0] fetch_target_pc;
-    wire [31:0] fetch_output_pc;
-    wire [31:0] fetch_output_instruction;
-    wire `fetch_output_state fetch_output_state;
-
-    cpu_fetch_stage #(
-        .reset_vector(reset_vector),
-        .mtvec(mtvec)
-        ) fetch_stage(
-        .clk(clk),
-        .reset(reset),
-        .memory_interface_fetch_address(memory_interface_fetch_address),
-        .memory_interface_fetch_data(memory_interface_fetch_data),
-        .memory_interface_fetch_valid(memory_interface_fetch_valid),
-        .fetch_action(fetch_action),
-        .target_pc(fetch_target_pc),
-        .output_pc(fetch_output_pc),
-        .output_instruction(fetch_output_instruction),
-        .output_state(fetch_output_state)
-        );
-
-    wire [6:0] decoder_funct7;
-    wire [2:0] decoder_funct3;
-    wire [4:0] decoder_rd;
-    wire [4:0] decoder_rs1;
-    wire [4:0] decoder_rs2;
-    wire [31:0] decoder_immediate;
-    wire [6:0] decoder_opcode;
-    wire `decode_action decode_action;
-
-    cpu_decoder decoder(
-        .instruction(fetch_output_instruction),
-        .funct7(decoder_funct7),
-        .funct3(decoder_funct3),
-        .rd(decoder_rd),
-        .rs1(decoder_rs1),
-        .rs2(decoder_rs2),
-        .immediate(decoder_immediate),
-        .opcode(decoder_opcode),
-        .decode_action(decode_action));
-
-    wire [31:0] register_rs1 = (decoder_rs1 == 0) ? 0 : registers[decoder_rs1];
-    wire [31:0] register_rs2 = (decoder_rs2 == 0) ? 0 : registers[decoder_rs2];
-
-    wire [31:0] load_store_address = decoder_immediate + register_rs1;
-
-    wire [1:0] load_store_address_low_2 = decoder_immediate[1:0] + register_rs1[1:0];
-
-    function get_load_store_misaligned(
-        input [2:0] funct3,
-        input [1:0] load_store_address_low_2
-        );
-    begin
-        case(funct3[1:0])
-        `funct3_sb:
-            get_load_store_misaligned = 0;
-        `funct3_sh:
-            get_load_store_misaligned = load_store_address_low_2[0] != 0;
-        `funct3_sw:
-            get_load_store_misaligned = load_store_address_low_2[1:0] != 0;
-        default:
-            get_load_store_misaligned = 1'bX;
-        endcase
-    end
-    endfunction
-
-    wire load_store_misaligned = get_load_store_misaligned(decoder_funct3, load_store_address_low_2);
-
-    assign memory_interface_rw_address = load_store_address[31:2];
-
-    wire [3:0] unshifted_load_store_byte_mask = {decoder_funct3[1] ? 2'b11 : 2'b00, (decoder_funct3[1] | decoder_funct3[0]) ? 1'b1 : 1'b0, 1'b1};
-
-    assign memory_interface_rw_byte_mask = unshifted_load_store_byte_mask << load_store_address_low_2;
-
-    assign memory_interface_rw_data_in[31:24] = load_store_address_low_2[1]
-                                                ? (load_store_address_low_2[0] ? register_rs2[7:0] : register_rs2[15:8])
-                                                : (load_store_address_low_2[0] ? register_rs2[23:16] : register_rs2[31:24]);
-    assign memory_interface_rw_data_in[23:16] = load_store_address_low_2[1] ? register_rs2[7:0] : register_rs2[23:16];
-    assign memory_interface_rw_data_in[15:8] = load_store_address_low_2[0] ? register_rs2[7:0] : register_rs2[15:8];
-    assign memory_interface_rw_data_in[7:0] = register_rs2[7:0];
-
-    wire [31:0] unmasked_loaded_value;
-
-    assign unmasked_loaded_value[7:0] = load_store_address_low_2[1]
-                                        ? (load_store_address_low_2[0] ? memory_interface_rw_data_out[31:24] : memory_interface_rw_data_out[23:16])
-                                        : (load_store_address_low_2[0] ? memory_interface_rw_data_out[15:8] : memory_interface_rw_data_out[7:0]);
-    assign unmasked_loaded_value[15:8] = load_store_address_low_2[1] ? memory_interface_rw_data_out[31:24] : memory_interface_rw_data_out[15:8];
-    assign unmasked_loaded_value[31:16] = memory_interface_rw_data_out[31:16];
-
-    wire [31:0] loaded_value;
-
-    assign loaded_value[7:0] = unmasked_loaded_value[7:0];
-    assign loaded_value[15:8] = decoder_funct3[1:0] == 0 ? ({8{~decoder_funct3[2] & unmasked_loaded_value[7]}}) : unmasked_loaded_value[15:8];
-    assign loaded_value[31:16] = decoder_funct3[1] == 0 ? ({16{~decoder_funct3[2] & (decoder_funct3[0] ? unmasked_loaded_value[15] : unmasked_loaded_value[7])}}) : unmasked_loaded_value[31:16];
-
-    assign memory_interface_rw_active = ~reset
-                                        & (fetch_output_state == `fetch_output_state_valid)
-                                        & ~load_store_misaligned
-                                        & ((decode_action & (`decode_action_load | `decode_action_store)) != 0);
-
-    assign memory_interface_rw_read_not_write = ~decoder_opcode[5];
-
-    wire [31:0] alu_a = register_rs1;
-    wire [31:0] alu_b = decoder_opcode[5] ? register_rs2 : decoder_immediate;
-    wire [31:0] alu_result;
 
     cpu_alu alu(
         .funct7(decoder_funct7),
@@ -649,7 +700,7 @@ module cpu(
         endcase
     end
     endfunction
-    
+
     assign csr_op_is_valid = get_csr_op_is_valid(csr_number, csr_reads, csr_writes);
 
     wire [63:0] cycle_counter = 0; // TODO: implement cycle_counter