2 Copyright (c) 2013, IIT Madras
5 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9 * Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
14 Module name: Riscv_arithmetic_unit.
15 author name: Neel Gala
16 Email id: neelgala@gmail.com
18 This module is the arithmetic execution unit for the RISCV ISA. It is a 64 bit implementation which is named as RV64.
19 The instruction with a "W" are RV64 instructions which ignore the upper 32 bits and operate on the lower 32 bits.
20 The arithmetic unit is implemented as a single case statement where the instruction bits define the various operations to be executed.
22 This module contains single cycle MUL instruction execution.
28 import defined_types::*;
29 `include "core_parameters.bsv"
30 `include "decode.defines"
32 function Tuple7#(Execution_output, Bit#(`VADDR), Flush_type, Maybe#(Training_data#(`VADDR)),Maybe#(Bit#(`VADDR)), Trap_type, Bit#(`PERFMONITORS)) fn_alu(Bit#(4) fn, Bit#(64) op1, Bit#(64) op2, Bit#(64) immediate_value, Bit#(`VADDR) pc ,
33 Instruction_type inst_type, Bit#(`VADDR) npc, Bit#(3) funct3, Access_type mem_access, Bit#(5) rd, Bit#(2) prediction,
34 Bit#(`PERFMONITORS) perfmonitors
35 `ifdef RV64 ,Bool word32 `endif );
36 // TODO: use the pc of the previous stage for next-pc. This will save space in the FIFOs
37 // But what if the instruction in the previous stage has not yet been enqueued (in cases of page/cache misses)
38 // In this case you will have to wait untill that instruction arrives before progressing. NEED TO THINK THIS THROUGH
39 /*========= Perform all the arithmetic ===== */
41 let inv_op2=(fn[3]==1)?~op2:op2;
42 let op1_xor_op2=op1^inv_op2;
43 let adder_output=op1+inv_op2+zeroExtend(fn[3]);
45 Bit#(1) compare_out=fn[0]^(
46 (fn[3]==0)?pack(op1_xor_op2==0):
47 (op1[64-1]==op2[64-1])?adder_output[64-1]:
48 (fn[1]==1)?op2[64-1]:op1[64-1]);
50 Bit#(6) shift_amt={((!word32)?op2[5]:0),op2[4:0]};
51 Bit#(32) upper_bits=word32?signExtend(fn[3]&op1[31]):op1[63:32];
52 Bit#(64) shift_inright={upper_bits,op1[31:0]};
53 let shin = (fn==`FNSR || fn==`FNSRA)?shift_inright:reverseBits(shift_inright);
54 Int#(TAdd#(64,1)) t=unpack({(fn[3]&shin[64-1]),shin});
55 Int#(64) shift_r=unpack(pack(t>>shift_amt)[64-1:0]);
56 let shift_l=reverseBits(pack(shift_r));
57 Bit#(64) shift_output=((fn==`FNSR || fn==`FNSRA)?pack(shift_r):0) |
58 ((fn==`FNSL)? pack(shift_l):0);
60 let logic_output= ((fn==`FNXOR || fn==`FNOR)?op1_xor_op2:0) |
61 ((fn==`FNOR || fn==`FNAND)?op1&op2:0);
62 let shift_logic=zeroExtend(pack(fn==`FNSEQ || fn==`FNSNE || fn >= `FNSLT)&compare_out) |
63 logic_output|shift_output;
64 Bit#(64) final_output = (fn==`FNADD || fn==`FNSUB)?adder_output:shift_logic;
66 final_output=signExtend(final_output[31:0]);
67 if(inst_type==MEMORY && mem_access==Atomic) // TODO see if this can be avoided
69 /*============================================ */
70 /*====== generate the effective address to jump to ====== */
71 Bit#(`VADDR) branch_address=truncate(immediate_value)+pc;
72 Bit#(`VADDR) next_pc=pc+4;
73 Bit#(`VADDR) effective_address=0;
74 Bit#(2) new_state=prediction;
75 if(inst_type==JAL || inst_type==JALR)
77 else if(final_output[0]==1)begin
79 new_state=new_state+1;
83 new_state=new_state-1;
85 Training_data#(`VADDR) bp_train = Training_data{pc:pc,branch_address:branch_address,state:new_state};
86 Maybe#(Training_data#(`VADDR)) training_data=tagged Invalid;
87 Maybe#(Bit#(`VADDR)) ras_push=tagged Invalid;
89 if(inst_type==BRANCH && final_output[0]==1)
90 perfmonitors[`COND_BRANCH_TAKEN]=1;
92 if((inst_type==BRANCH && final_output[0]==1) || inst_type==JAL)
93 effective_address=branch_address;
94 else if(inst_type==FENCEI || (inst_type==BRANCH && final_output[0]==0))
95 effective_address=next_pc;
97 effective_address=truncate(final_output);
98 bp_train.branch_address=truncate(final_output);
100 if(inst_type==JAL || inst_type==JALR)
101 final_output=signExtend(next_pc);
103 if(inst_type==BRANCH)
106 /*======================================================== */
107 /*==== Generate flush if prediction was wrong or FENCEI ========== */
108 if(inst_type==BRANCH || inst_type==JAL || ((rd != 'b00101 || rd!='b00001) && inst_type==JALR))
109 training_data=tagged Valid bp_train;
110 Flush_type flush=None;
111 if((inst_type==BRANCH || inst_type==JAL || inst_type==JALR) && effective_address!=npc)begin
112 if(inst_type==BRANCH)
113 perfmonitors[`COND_BRANCH_MISPREDICTED]=1;
116 else if(inst_type==FENCEI)
118 if((inst_type==JAL||inst_type==JALR) &&& rd matches 'b00?01) // TODO put on RAS only if rd = ra
119 ras_push=tagged Valid next_pc;
120 /*================================================================ */
121 Trap_type exception=tagged None;
122 if((inst_type==JALR || inst_type==JAL) && effective_address[1]!=0)
123 exception=tagged Exception Inst_addr_misaligned;
124 Execution_output result;
125 if(inst_type==MEMORY || inst_type==FENCE || inst_type == FENCEI)begin
126 result= tagged MEMORY (Memout{
127 address:final_output,
128 memory_data:immediate_value,
129 transfer_size:zeroExtend(funct3[1:0]),
130 signextend:~funct3[2],
131 mem_type:(inst_type==FENCE || inst_type==FENCEI)?Fence:mem_access
132 `ifdef atomic ,atomic_op:{pack(word32),fn} `endif });
134 else if(inst_type==SYSTEM_INSTR)begin
135 result=tagged SYSTEM (CSRInputs{rs1:op1,rs2:op2,rs1_addr:immediate_value[16:12],funct3:funct3,csr_address:immediate_value[11:0]});
138 result=tagged RESULT (Arithout{aluresult:final_output,fflags:0});
139 return tuple7(result,effective_address,flush, training_data,ras_push,exception,perfmonitors);
142 // module mkTb(Empty);
145 // Bit#(64) op1='h8000000000004123;
146 // Int#(64) in1=unpack(op1);
147 // Bit#(64) op2='h8000000000004123;
148 // let {x,ea,flush}<-fn_alu(`FNSNE,op1,op2,'d0,'d0,BRANCH,'h800,'d3,Load,False);
149 // //$display("Output is: :%h Excepted: %h",x,(op1!=op2));