X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=cpu.py;h=4e769f9b600610704959e9ad8e69cc5b1083bf44;hb=daafe882cb591c8e3a3a04f74561cc941cde52f7;hp=98e888ab20fbe2f5edf9c86179340158b13e8999;hpb=2ab5b48210e064bc87131bd6e1451ff89c064931;p=rv32.git diff --git a/cpu.py b/cpu.py index 98e888a..4e769f9 100644 --- a/cpu.py +++ b/cpu.py @@ -28,17 +28,58 @@ from migen import * from migen.fhdl import verilog +from migen.fhdl.structure import _Operator from riscvdefs import * from cpudefs import * +class MemoryInterface: + fetch_address = Signal(32, name="memory_interface_fetch_address") # XXX [2:] + fetch_data = Signal(32, name="memory_interface_fetch_data") + fetch_valid = Signal(name="memory_interface_fetch_valid") + rw_address= Signal(32, name="memory_interface_rw_address") # XXX [2:] + rw_byte_mask = Signal(4, name="memory_interface_rw_byte_mask") + rw_read_not_write = Signal(name="memory_interface_rw_read_not_write") + rw_active = Signal(name="memory_interface_rw_active") + rw_data_in = Signal(32, name="memory_interface_rw_data_in") + rw_data_out = Signal(32, name="memory_interface_rw_data_out") + rw_address_valid = Signal(name="memory_interface_rw_address_valid") + rw_wait = Signal(name="memory_interface_rw_wait") + + +class Decoder: + funct7 = Signal(7, name="decoder_funct7") + funct3 = Signal(3, name="decoder_funct3") + rd = Signal(5, name="decoder_rd") + rs1 = Signal(5, name="decoder_rs1") + rs2 = Signal(5, name="decoder_rs2") + immediate = Signal(32, name="decoder_immediate") + opcode = Signal(7, name="decoder_opcode") + act = Signal(decode_action, name="decoder_action") + + class CPU(Module): - """ + """ """ + def get_ls_misaligned(self, ls, funct3, load_store_address_low_2): + return Case(funct3[:2], + { F3.sb: ls.eq(Constant(0)), + F3.sh: ls.eq(load_store_address_low_2[0] != 0), + F3.sw: ls.eq(load_store_address_low_2[0:2] != Constant(0, 2)), + "default": ls.eq(Constant(1)) + }) + + def get_lsbm(self, dc): + return Cat(Constant(1), + Mux((dc.funct3[1] | dc.funct3[0]), + Constant(1), Constant(0)), + Mux((dc.funct3[1]), + Constant(0b11, 2), Constant(0, 2))) + def __init__(self): - #self.clk = ClockSignal() - #self.reset = ResetSignal() + self.clk = ClockSignal() + self.reset = ResetSignal() self.tty_write = Signal() self.tty_write_data = Signal(8) self.tty_write_busy = Signal() @@ -58,39 +99,26 @@ class CPU(Module): l = [] for i in range(31): l.append(Signal(32, name="register%d" % i)) - self.registers = Array(l) - - #self.sync += self.registers[0].eq(0) - #self.sync += self.registers[1].eq(0) - - memory_interface_fetch_address = Signal(32)[2:] - memory_interface_fetch_data = Signal(32) - memory_interface_fetch_valid = Signal() - memory_interface_rw_address= Signal(32)[2:] - memory_interface_rw_byte_mask = Signal(4) - memory_interface_rw_read_not_write = Signal() - memory_interface_rw_active = Signal() - memory_interface_rw_data_in = Signal(32) - memory_interface_rw_data_out = Signal(32) - memory_interface_rw_address_valid = Signal() - memory_interface_rw_wait = Signal() - - mi = Instance("cpu_memory_interface", + registers = Array(l) + + mi = MemoryInterface() + + mii = Instance("cpu_memory_interface", name="memory_instance", p_ram_size = ram_size, p_ram_start = ram_start, i_clk=ClockSignal(), i_rst=ResetSignal(), - i_fetch_address = memory_interface_fetch_address, - o_fetch_data = memory_interface_fetch_data, - o_fetch_valid = memory_interface_fetch_valid, - i_rw_address = memory_interface_rw_address, - i_rw_byte_mask = memory_interface_rw_byte_mask, - i_rw_read_not_write = memory_interface_rw_read_not_write, - i_rw_active = memory_interface_rw_active, - i_rw_data_in = memory_interface_rw_data_in, - o_rw_data_out = memory_interface_rw_data_out, - o_rw_address_valid = memory_interface_rw_address_valid, - o_rw_wait = memory_interface_rw_wait, + i_fetch_address = mi.fetch_address, + o_fetch_data = mi.fetch_data, + o_fetch_valid = mi.fetch_valid, + i_rw_address = mi.rw_address, + i_rw_byte_mask = mi.rw_byte_mask, + i_rw_read_not_write = mi.rw_read_not_write, + i_rw_active = mi.rw_active, + i_rw_data_in = mi.rw_data_in, + o_rw_data_out = mi.rw_data_out, + o_rw_address_valid = mi.rw_address_valid, + o_rw_wait = mi.rw_wait, o_tty_write = self.tty_write, o_tty_write_data = self.tty_write_data, i_tty_write_busy = self.tty_write_busy, @@ -99,35 +127,142 @@ class CPU(Module): o_led_1 = self.led_1, o_led_3 = self.led_3 ) - self.specials += mi - -""" - cpu_memory_interface #( - .ram_size(ram_size), - .ram_start(ram_start) - ) memory_interface( - .clk(clk), - .reset(reset), - .fetch_address(memory_interface_fetch_address), - .fetch_data(memory_interface_fetch_data), - .fetch_valid(memory_interface_fetch_valid), - .rw_address(memory_interface_rw_address), - .rw_byte_mask(memory_interface_rw_byte_mask), - .rw_read_not_write(memory_interface_rw_read_not_write), - .rw_active(memory_interface_rw_active), - .rw_data_in(memory_interface_rw_data_in), - .rw_data_out(memory_interface_rw_data_out), - .rw_address_valid(memory_interface_rw_address_valid), - .rw_wait(memory_interface_rw_wait), - .tty_write(tty_write), - .tty_write_data(tty_write_data), - .tty_write_busy(tty_write_busy), - .switch_2(switch_2), - .switch_3(switch_3), - .led_1(led_1), - .led_3(led_3) - ); -""" + self.specials += mii + + fetch_act = Signal(fetch_action) + fetch_target_pc = Signal(32) + fetch_output_pc = Signal(32) + fetch_output_instruction = Signal(32) + fetch_output_st = Signal(fetch_output_state) + + fs = Instance("CPUFetchStage", name="fetch_stage", + i_clk=ClockSignal(), + i_rst=ResetSignal(), + o_memory_interface_fetch_address = mi.fetch_address, + i_memory_interface_fetch_data = mi.fetch_data, + i_memory_interface_fetch_valid = mi.fetch_valid, + i_fetch_action = fetch_act, + i_target_pc = fetch_target_pc, + o_output_pc = fetch_output_pc, + o_output_instruction = fetch_output_instruction, + o_output_state = fetch_output_st, + i_reset_vector = reset_vector, + i_mtvec = mtvec, + ) + self.specials += fs + + dc = Decoder() + + cd = Instance("CPUDecoder", name="decoder", + i_instruction = fetch_output_instruction, + o_funct7 = dc.funct7, + o_funct3 = dc.funct3, + o_rd = dc.rd, + o_rs1 = dc.rs1, + o_rs2 = dc.rs2, + o_immediate = dc.immediate, + o_opcode = dc.opcode, + o_decode_action = dc.act + ) + self.specials += cd + + register_rs1 = Signal(32) + register_rs2 = Signal(32) + self.comb += If(dc.rs1 == 0, + register_rs1.eq(0) + ).Else( + register_rs1.eq(registers[dc.rs1-1])) + self.comb += If(dc.rs2 == 0, + register_rs2.eq(0) + ).Else( + register_rs2.eq(registers[dc.rs2-1])) + + load_store_address = Signal(32) + load_store_address_low_2 = Signal(2) + + self.comb += load_store_address.eq(dc.immediate + register_rs1) + self.comb += load_store_address_low_2.eq( + dc.immediate[:2] + register_rs1[:2]) + + load_store_misaligned = Signal() + + lsa = self.get_ls_misaligned(load_store_misaligned, dc.funct3, + load_store_address_low_2) + self.comb += lsa + + # XXX rwaddr not 31:2 any more + self.comb += mi.rw_address.eq(load_store_address[2:]) + + unshifted_load_store_byte_mask = Signal(4) + + self.comb += unshifted_load_store_byte_mask.eq(self.get_lsbm(dc)) + + # XXX yuck. this will cause migen simulation to fail + # (however conversion to verilog works) + self.comb += mi.rw_byte_mask.eq( + _Operator("<<", [unshifted_load_store_byte_mask, + load_store_address_low_2])) + + # XXX not obvious + b3 = Mux(load_store_address_low_2[1], + Mux(load_store_address_low_2[0], register_rs2[0:8], + register_rs2[8:16]), + Mux(load_store_address_low_2[0], register_rs2[16:24], + register_rs2[24:32])) + b2 = Mux(load_store_address_low_2[1], register_rs2[0:8], + register_rs2[16:24]) + b1 = Mux(load_store_address_low_2[0], register_rs2[0:8], + register_rs2[8:16]) + b0 = register_rs2[0:8] + + self.comb += mi.rw_data_in.eq(Cat(b0, b1, b2, b3)) + + # XXX not obvious + unmasked_loaded_value = Signal(32) + + b0 = Mux(load_store_address_low_2[1], + Mux(load_store_address_low_2[0], mi.rw_data_out[24:32], + mi.rw_data_out[16:24]), + Mux(load_store_address_low_2[0], mi.rw_data_out[15:8], + mi.rw_data_out[0:8])) + b1 = Mux(load_store_address_low_2[1], mi.rw_data_out[24:31], + mi.rw_data_out[8:16]) + b23 = mi.rw_data_out[16:32] + + self.comb += unmasked_loaded_value.eq(Cat(b0, b1, b23)) + + # XXX not obvious + loaded_value = Signal(32) + + b0 = unmasked_loaded_value[0:8] + b1 = Mux(dc.funct3[0:2] == 0, + Replicate(~dc.funct3[2] & unmasked_loaded_value[7], 8), + unmasked_loaded_value[8:16]) + b2 = Mux(dc.funct3[1] == 0, + Replicate(~dc.funct3[2] & + Mux(dc.funct3[0], unmasked_loaded_value[15], + unmasked_loaded_value[7]), + 16), + unmasked_loaded_value[16:32]) + + self.comb += loaded_value.eq(Cat(b0, b1, b2)) + + self.comb += mi.rw_active.eq(~self.reset + & (fetch_output_st == fetch_output_state_valid) + & ~load_store_misaligned + & ((dc.act & (DA.load | DA.store)) != 0)) + + self.comb += mi.rw_read_not_write.eq(~dc.opcode[5]) + + # alu + alu_a = Signal(32) + alu_b = Signal(32) + alu_result = Signal(32) + + self.comb += alu_a.eq(register_rs1) + self.comb += alu_b.eq(Mux(dc.opcode[5], + register_rs2, + dc.immediate)) if __name__ == "__main__": example = CPU() @@ -143,169 +278,6 @@ if __name__ == "__main__": })) """ -module cpu( - input clk, - input reset, - output tty_write, - output [7:0] tty_write_data, - input tty_write_busy, - input switch_2, - input switch_3, - output led_1, - output led_3 - ); - - parameter ram_size = 'h8000; - parameter ram_start = 32'h1_0000; - parameter reset_vector = ram_start; - parameter mtvec = ram_start + 'h40; - - reg [31:0] registers[31:1]; - - wire [31:2] memory_interface_fetch_address; - wire [31:0] memory_interface_fetch_data; - wire memory_interface_fetch_valid; - wire [31:2] memory_interface_rw_address; - wire [3:0] memory_interface_rw_byte_mask; - wire memory_interface_rw_read_not_write; - wire memory_interface_rw_active; - wire [31:0] memory_interface_rw_data_in; - wire [31:0] memory_interface_rw_data_out; - wire memory_interface_rw_address_valid; - wire memory_interface_rw_wait; - - cpu_memory_interface #( - .ram_size(ram_size), - .ram_start(ram_start) - ) memory_interface( - .clk(clk), - .reset(reset), - .fetch_address(memory_interface_fetch_address), - .fetch_data(memory_interface_fetch_data), - .fetch_valid(memory_interface_fetch_valid), - .rw_address(memory_interface_rw_address), - .rw_byte_mask(memory_interface_rw_byte_mask), - .rw_read_not_write(memory_interface_rw_read_not_write), - .rw_active(memory_interface_rw_active), - .rw_data_in(memory_interface_rw_data_in), - .rw_data_out(memory_interface_rw_data_out), - .rw_address_valid(memory_interface_rw_address_valid), - .rw_wait(memory_interface_rw_wait), - .tty_write(tty_write), - .tty_write_data(tty_write_data), - .tty_write_busy(tty_write_busy), - .switch_2(switch_2), - .switch_3(switch_3), - .led_1(led_1), - .led_3(led_3) - ); - - wire `fetch_action fetch_action; - wire [31:0] fetch_target_pc; - wire [31:0] fetch_output_pc; - wire [31:0] fetch_output_instruction; - wire `fetch_output_state fetch_output_state; - - cpu_fetch_stage #( - .reset_vector(reset_vector), - .mtvec(mtvec) - ) fetch_stage( - .clk(clk), - .reset(reset), - .memory_interface_fetch_address(memory_interface_fetch_address), - .memory_interface_fetch_data(memory_interface_fetch_data), - .memory_interface_fetch_valid(memory_interface_fetch_valid), - .fetch_action(fetch_action), - .target_pc(fetch_target_pc), - .output_pc(fetch_output_pc), - .output_instruction(fetch_output_instruction), - .output_state(fetch_output_state) - ); - - wire [6:0] decoder_funct7; - wire [2:0] decoder_funct3; - wire [4:0] decoder_rd; - wire [4:0] decoder_rs1; - wire [4:0] decoder_rs2; - wire [31:0] decoder_immediate; - wire [6:0] decoder_opcode; - wire `decode_action decode_action; - - cpu_decoder decoder( - .instruction(fetch_output_instruction), - .funct7(decoder_funct7), - .funct3(decoder_funct3), - .rd(decoder_rd), - .rs1(decoder_rs1), - .rs2(decoder_rs2), - .immediate(decoder_immediate), - .opcode(decoder_opcode), - .decode_action(decode_action)); - - wire [31:0] register_rs1 = (decoder_rs1 == 0) ? 0 : registers[decoder_rs1]; - wire [31:0] register_rs2 = (decoder_rs2 == 0) ? 0 : registers[decoder_rs2]; - - wire [31:0] load_store_address = decoder_immediate + register_rs1; - - wire [1:0] load_store_address_low_2 = decoder_immediate[1:0] + register_rs1[1:0]; - - function get_load_store_misaligned( - input [2:0] funct3, - input [1:0] load_store_address_low_2 - ); - begin - case(funct3[1:0]) - `funct3_sb: - get_load_store_misaligned = 0; - `funct3_sh: - get_load_store_misaligned = load_store_address_low_2[0] != 0; - `funct3_sw: - get_load_store_misaligned = load_store_address_low_2[1:0] != 0; - default: - get_load_store_misaligned = 1'bX; - endcase - end - endfunction - - wire load_store_misaligned = get_load_store_misaligned(decoder_funct3, load_store_address_low_2); - - assign memory_interface_rw_address = load_store_address[31:2]; - - wire [3:0] unshifted_load_store_byte_mask = {decoder_funct3[1] ? 2'b11 : 2'b00, (decoder_funct3[1] | decoder_funct3[0]) ? 1'b1 : 1'b0, 1'b1}; - - assign memory_interface_rw_byte_mask = unshifted_load_store_byte_mask << load_store_address_low_2; - - assign memory_interface_rw_data_in[31:24] = load_store_address_low_2[1] - ? (load_store_address_low_2[0] ? register_rs2[7:0] : register_rs2[15:8]) - : (load_store_address_low_2[0] ? register_rs2[23:16] : register_rs2[31:24]); - assign memory_interface_rw_data_in[23:16] = load_store_address_low_2[1] ? register_rs2[7:0] : register_rs2[23:16]; - assign memory_interface_rw_data_in[15:8] = load_store_address_low_2[0] ? register_rs2[7:0] : register_rs2[15:8]; - assign memory_interface_rw_data_in[7:0] = register_rs2[7:0]; - - wire [31:0] unmasked_loaded_value; - - assign unmasked_loaded_value[7:0] = load_store_address_low_2[1] - ? (load_store_address_low_2[0] ? memory_interface_rw_data_out[31:24] : memory_interface_rw_data_out[23:16]) - : (load_store_address_low_2[0] ? memory_interface_rw_data_out[15:8] : memory_interface_rw_data_out[7:0]); - assign unmasked_loaded_value[15:8] = load_store_address_low_2[1] ? memory_interface_rw_data_out[31:24] : memory_interface_rw_data_out[15:8]; - assign unmasked_loaded_value[31:16] = memory_interface_rw_data_out[31:16]; - - wire [31:0] loaded_value; - - assign loaded_value[7:0] = unmasked_loaded_value[7:0]; - assign loaded_value[15:8] = decoder_funct3[1:0] == 0 ? ({8{~decoder_funct3[2] & unmasked_loaded_value[7]}}) : unmasked_loaded_value[15:8]; - assign loaded_value[31:16] = decoder_funct3[1] == 0 ? ({16{~decoder_funct3[2] & (decoder_funct3[0] ? unmasked_loaded_value[15] : unmasked_loaded_value[7])}}) : unmasked_loaded_value[31:16]; - - assign memory_interface_rw_active = ~reset - & (fetch_output_state == `fetch_output_state_valid) - & ~load_store_misaligned - & ((decode_action & (`decode_action_load | `decode_action_store)) != 0); - - assign memory_interface_rw_read_not_write = ~decoder_opcode[5]; - - wire [31:0] alu_a = register_rs1; - wire [31:0] alu_b = decoder_opcode[5] ? register_rs2 : decoder_immediate; - wire [31:0] alu_result; cpu_alu alu( .funct7(decoder_funct7), @@ -728,7 +700,7 @@ module cpu( endcase end endfunction - + assign csr_op_is_valid = get_csr_op_is_valid(csr_number, csr_reads, csr_writes); wire [63:0] cycle_counter = 0; // TODO: implement cycle_counter