add core
[shakti-core.git] / src / core / alu.bsv
1 /*
2 Copyright (c) 2013, IIT Madras
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6
7 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9 * Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10
11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
13
14 Module name: Riscv_arithmetic_unit.
15 author name: Neel Gala
16 Email id: neelgala@gmail.com
17
18 This module is the arithmetic execution unit for the RISCV ISA. It is a 64 bit implementation which is named as RV64.
19 The instruction with a "W" are RV64 instructions which ignore the upper 32 bits and operate on the lower 32 bits.
20 The arithmetic unit is implemented as a single case statement where the instruction bits define the various operations to be executed.
21
22 This module contains single cycle MUL instruction execution.
23
24 */
25
26 package alu;
27
28 import defined_types::*;
29 `include "defined_parameters.bsv"
30 `include "decode.defines"
31 (*noinline*)
32 function Tuple7#(Execution_output, Bit#(`VADDR), Flush_type, Maybe#(Training_data#(`VADDR)),Maybe#(Bit#(`VADDR)), Trap_type, Bit#(`PERFMONITORS)) fn_alu(Bit#(4) fn, Bit#(64) op1, Bit#(64) op2, Bit#(64) immediate_value, Bit#(`VADDR) pc ,
33 Instruction_type inst_type, Bit#(`VADDR) npc, Bit#(3) funct3, Access_type mem_access, Bit#(5) rd, Bit#(2) prediction,
34 Bit#(`PERFMONITORS) perfmonitors
35 `ifdef RV64 ,Bool word32 `endif );
36 // TODO: use the pc of the previous stage for next-pc. This will save space in the FIFOs
37 // But what if the instruction in the previous stage has not yet been enqueued (in cases of page/cache misses)
38 // In this case you will have to wait untill that instruction arrives before progressing. NEED TO THINK THIS THROUGH
39 /*========= Perform all the arithmetic ===== */
40 // ADD* ADDI* SUB*
41 let inv_op2=(fn[3]==1)?~op2:op2;
42 let op1_xor_op2=op1^inv_op2;
43 let adder_output=op1+inv_op2+zeroExtend(fn[3]);
44 // SLT SLTU
45 Bit#(1) compare_out=fn[0]^(
46 (fn[3]==0)?pack(op1_xor_op2==0):
47 (op1[64-1]==op2[64-1])?adder_output[64-1]:
48 (fn[1]==1)?op2[64-1]:op1[64-1]);
49 // SLL SRL SRA
50 Bit#(6) shift_amt={((!word32)?op2[5]:0),op2[4:0]};
51 Bit#(32) upper_bits=word32?signExtend(fn[3]&op1[31]):op1[63:32];
52 Bit#(64) shift_inright={upper_bits,op1[31:0]};
53 let shin = (fn==`FNSR || fn==`FNSRA)?shift_inright:reverseBits(shift_inright);
54 Int#(TAdd#(64,1)) t=unpack({(fn[3]&shin[64-1]),shin});
55 Int#(64) shift_r=unpack(pack(t>>shift_amt)[64-1:0]);
56 let shift_l=reverseBits(pack(shift_r));
57 Bit#(64) shift_output=((fn==`FNSR || fn==`FNSRA)?pack(shift_r):0) |
58 ((fn==`FNSL)? pack(shift_l):0);
59 // AND OR XOR
60 let logic_output= ((fn==`FNXOR || fn==`FNOR)?op1_xor_op2:0) |
61 ((fn==`FNOR || fn==`FNAND)?op1&op2:0);
62 let shift_logic=zeroExtend(pack(fn==`FNSEQ || fn==`FNSNE || fn >= `FNSLT)&compare_out) |
63 logic_output|shift_output;
64 Bit#(64) final_output = (fn==`FNADD || fn==`FNSUB)?adder_output:shift_logic;
65 if(word32)
66 final_output=signExtend(final_output[31:0]);
67 if(inst_type==MEMORY && mem_access==Atomic) // TODO see if this can be avoided
68 final_output=op1;
69 /*============================================ */
70 /*====== generate the effective address to jump to ====== */
71 Bit#(`VADDR) branch_address=truncate(immediate_value)+pc;
72 Bit#(`VADDR) next_pc=pc+4;
73 Bit#(`VADDR) effective_address=0;
74 Bit#(2) new_state=prediction;
75 if(inst_type==JAL || inst_type==JALR)
76 new_state='b11;
77 else if(final_output[0]==1)begin
78 if(new_state<3)
79 new_state=new_state+1;
80 end
81 else begin
82 if(new_state>0)
83 new_state=new_state-1;
84 end
85 Training_data#(`VADDR) bp_train = Training_data{pc:pc,branch_address:branch_address,state:new_state};
86 Maybe#(Training_data#(`VADDR)) training_data=tagged Invalid;
87 Maybe#(Bit#(`VADDR)) ras_push=tagged Invalid;
88
89 if(inst_type==BRANCH && final_output[0]==1)
90 perfmonitors[`COND_BRANCH_TAKEN]=1;
91
92 if((inst_type==BRANCH && final_output[0]==1) || inst_type==JAL)
93 effective_address=branch_address;
94 else if(inst_type==FENCEI || (inst_type==BRANCH && final_output[0]==0))
95 effective_address=next_pc;
96 else begin
97 effective_address=truncate(final_output);
98 bp_train.branch_address=truncate(final_output);
99 end
100 if(inst_type==JAL || inst_type==JALR)
101 final_output=signExtend(next_pc);
102 `ifdef simulate
103 if(inst_type==BRANCH)
104 final_output=0;
105 `endif
106 /*======================================================== */
107 /*==== Generate flush if prediction was wrong or FENCEI ========== */
108 if(inst_type==BRANCH || inst_type==JAL || ((rd != 'b00101 || rd!='b00001) && inst_type==JALR))
109 training_data=tagged Valid bp_train;
110 Flush_type flush=None;
111 if((inst_type==BRANCH || inst_type==JAL || inst_type==JALR) && effective_address!=npc)begin
112 if(inst_type==BRANCH)
113 perfmonitors[`COND_BRANCH_MISPREDICTED]=1;
114 flush=AccessFlush;
115 end
116 else if(inst_type==FENCEI)
117 flush=Fence;
118 if((inst_type==JAL||inst_type==JALR) &&& rd matches 'b00?01) // TODO put on RAS only if rd = ra
119 ras_push=tagged Valid next_pc;
120 /*================================================================ */
121 Trap_type exception=tagged None;
122 if((inst_type==JALR || inst_type==JAL) && effective_address[1]!=0)
123 exception=tagged Exception Inst_addr_misaligned;
124 Execution_output result;
125 if(inst_type==MEMORY || inst_type==FENCE || inst_type == FENCEI)begin
126 result= tagged MEMORY (Memout{
127 address:final_output,
128 memory_data:immediate_value,
129 transfer_size:zeroExtend(funct3[1:0]),
130 signextend:~funct3[2],
131 mem_type:(inst_type==FENCE || inst_type==FENCEI)?Fence:mem_access
132 `ifdef atomic ,atomic_op:{pack(word32),fn} `endif });
133 end
134 else if(inst_type==SYSTEM_INSTR)begin
135 result=tagged SYSTEM (CSRInputs{rs1:op1,rs2:op2,rs1_addr:immediate_value[16:12],funct3:funct3,csr_address:immediate_value[11:0]});
136 end
137 else
138 result=tagged RESULT (Arithout{aluresult:final_output,fflags:0});
139 return tuple7(result,effective_address,flush, training_data,ras_push,exception,perfmonitors);
140 endfunction
141
142 // module mkTb(Empty);
143 //
144 // rule test_alu;
145 // Bit#(64) op1='h8000000000004123;
146 // Int#(64) in1=unpack(op1);
147 // Bit#(64) op2='h8000000000004123;
148 // let {x,ea,flush}<-fn_alu(`FNSNE,op1,op2,'d0,'d0,BRANCH,'h800,'d3,Load,False);
149 // //$display("Output is: :%h Excepted: %h",x,(op1!=op2));
150 // $finish(0);
151 // endrule
152 // endmodule
153
154 endpackage