src/core/alu.bsv

   1 /*
   2 Copyright (c) 2013, IIT Madras
   3 All rights reserved.
   4
   5 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
   6
   7 *  Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
   8 *  Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
   9 *  Neither the name of IIT Madras  nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
  10
  11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  12 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  13
  14 Module name: Riscv_arithmetic_unit.
  15 author name: Neel Gala
  16 Email id:    neelgala@gmail.com
  17
  18 This module is the arithmetic execution unit for the RISCV ISA. It is a 64 bit implementation which is named as RV64.
  19 The instruction with a "W" are RV64 instructions which ignore the upper 32 bits and operate on the lower 32 bits.
  20 The arithmetic unit is implemented as a single case statement where the instruction bits define the various operations to be executed.
  21
  22 This module contains single cycle MUL instruction execution.
  23
  24 */
  25
  26 package alu;
  27
  28 import defined_types::*;
  29 `include "defined_parameters.bsv"
  30 `include "decode.defines"
  31         (*noinline*)
  32         function Tuple7#(Execution_output, Bit#(`VADDR), Flush_type, Maybe#(Training_data#(`VADDR)),Maybe#(Bit#(`VADDR)), Trap_type, Bit#(`PERFMONITORS)) fn_alu(Bit#(4) fn, Bit#(64) op1, Bit#(64) op2, Bit#(64) immediate_value, Bit#(`VADDR) pc ,
  33                                                                                                                                                                                                                                 Instruction_type inst_type, Bit#(`VADDR) npc, Bit#(3) funct3, Access_type mem_access, Bit#(5) rd, Bit#(2) prediction,
  34                                                                                                                                                                                                                                 Bit#(`PERFMONITORS) perfmonitors
  35                                                                                                                                                                                                                                         `ifdef RV64 ,Bool word32 `endif );
  36 // TODO: use the pc of the previous stage for next-pc. This will save space in the FIFOs
  37 // But what if the instruction in the previous stage has not yet been enqueued (in cases of page/cache misses)
  38 // In this case you will have to wait untill that instruction arrives before progressing. NEED TO THINK THIS THROUGH
  39                 /*========= Perform all the arithmetic ===== */
  40                 // ADD* ADDI* SUB*
  41                 let inv_op2=(fn[3]==1)?~op2:op2;
  42                 let op1_xor_op2=op1^inv_op2;
  43                 let adder_output=op1+inv_op2+zeroExtend(fn[3]);
  44                 // SLT SLTU
  45                 Bit#(1) compare_out=fn[0]^(
  46                                                         (fn[3]==0)?pack(op1_xor_op2==0):
  47                                                         (op1[64-1]==op2[64-1])?adder_output[64-1]:
  48                                                         (fn[1]==1)?op2[64-1]:op1[64-1]);
  49                 // SLL SRL SRA
  50                 Bit#(6) shift_amt={((!word32)?op2[5]:0),op2[4:0]};
  51                 Bit#(32) upper_bits=word32?signExtend(fn[3]&op1[31]):op1[63:32];
  52                 Bit#(64) shift_inright={upper_bits,op1[31:0]};
  53                 let shin = (fn==`FNSR || fn==`FNSRA)?shift_inright:reverseBits(shift_inright);
  54                 Int#(TAdd#(64,1)) t=unpack({(fn[3]&shin[64-1]),shin});
  55                 Int#(64) shift_r=unpack(pack(t>>shift_amt)[64-1:0]);
  56                 let shift_l=reverseBits(pack(shift_r));
  57                 Bit#(64) shift_output=((fn==`FNSR || fn==`FNSRA)?pack(shift_r):0) |
  58                                                                                                 ((fn==`FNSL)?                             pack(shift_l):0);
  59                 // AND OR XOR
  60                 let logic_output=       ((fn==`FNXOR || fn==`FNOR)?op1_xor_op2:0) |
  61                                                                 ((fn==`FNOR || fn==`FNAND)?op1&op2:0);
  62                 let shift_logic=zeroExtend(pack(fn==`FNSEQ || fn==`FNSNE || fn >= `FNSLT)&compare_out) |
  63                                                          logic_output|shift_output;
  64                 Bit#(64) final_output = (fn==`FNADD || fn==`FNSUB)?adder_output:shift_logic;
  65                 if(word32)
  66                         final_output=signExtend(final_output[31:0]);
  67                 if(inst_type==MEMORY && mem_access==Atomic) // TODO see if this can be avoided
  68                         final_output=op1;
  69                 /*============================================ */
  70                 /*====== generate the effective address to jump to ====== */
  71                 Bit#(`VADDR) branch_address=truncate(immediate_value)+pc;
  72                 Bit#(`VADDR) next_pc=pc+4;
  73                 Bit#(`VADDR) effective_address=0;
  74                 Bit#(2) new_state=prediction;
  75                 if(inst_type==JAL || inst_type==JALR)
  76                         new_state='b11;
  77                 else if(final_output[0]==1)begin
  78                         if(new_state<3)
  79                                 new_state=new_state+1;
  80                 end
  81                 else begin
  82                         if(new_state>0)
  83                                 new_state=new_state-1;
  84                 end
  85                 Training_data#(`VADDR) bp_train = Training_data{pc:pc,branch_address:branch_address,state:new_state};
  86                 Maybe#(Training_data#(`VADDR)) training_data=tagged Invalid;
  87                 Maybe#(Bit#(`VADDR)) ras_push=tagged Invalid;
  88
  89                 if(inst_type==BRANCH && final_output[0]==1)
  90                         perfmonitors[`COND_BRANCH_TAKEN]=1;
  91
  92                 if((inst_type==BRANCH && final_output[0]==1) || inst_type==JAL)
  93                         effective_address=branch_address;
  94                 else if(inst_type==FENCEI || (inst_type==BRANCH && final_output[0]==0))
  95                         effective_address=next_pc;
  96                 else begin
  97                         effective_address=truncate(final_output);
  98                         bp_train.branch_address=truncate(final_output);
  99                 end
 100                 if(inst_type==JAL || inst_type==JALR)
 101                         final_output=signExtend(next_pc);
 102                 `ifdef simulate
 103                         if(inst_type==BRANCH)
 104                                 final_output=0;
 105                 `endif
 106                 /*======================================================== */
 107                 /*==== Generate flush if prediction was wrong or FENCEI ========== */
 108                 if(inst_type==BRANCH || inst_type==JAL || ((rd != 'b00101 || rd!='b00001) && inst_type==JALR))
 109                         training_data=tagged Valid bp_train;
 110                 Flush_type flush=None;
 111                 if((inst_type==BRANCH || inst_type==JAL || inst_type==JALR) && effective_address!=npc)begin
 112                         if(inst_type==BRANCH)
 113                                 perfmonitors[`COND_BRANCH_MISPREDICTED]=1;
 114                         flush=AccessFlush;
 115                 end
 116                 else if(inst_type==FENCEI)
 117                         flush=Fence;
 118                 if((inst_type==JAL||inst_type==JALR) &&& rd matches 'b00?01) // TODO put on RAS only if rd = ra
 119                         ras_push=tagged Valid next_pc;
 120                 /*================================================================ */
 121         Trap_type exception=tagged None;
 122         if((inst_type==JALR || inst_type==JAL) && effective_address[1]!=0)
 123                 exception=tagged Exception Inst_addr_misaligned;
 124         Execution_output result;
 125                 if(inst_type==MEMORY || inst_type==FENCE || inst_type == FENCEI)begin
 126                         result= tagged MEMORY (Memout{
 127                                 address:final_output,
 128                                 memory_data:immediate_value,
 129                                 transfer_size:zeroExtend(funct3[1:0]),
 130                                 signextend:~funct3[2],
 131                                 mem_type:(inst_type==FENCE || inst_type==FENCEI)?Fence:mem_access
 132                                 `ifdef atomic ,atomic_op:{pack(word32),fn} `endif       });
 133                 end
 134                 else if(inst_type==SYSTEM_INSTR)begin
 135                         result=tagged SYSTEM (CSRInputs{rs1:op1,rs2:op2,rs1_addr:immediate_value[16:12],funct3:funct3,csr_address:immediate_value[11:0]});
 136                 end
 137                 else
 138                         result=tagged RESULT (Arithout{aluresult:final_output,fflags:0});
 139                 return tuple7(result,effective_address,flush, training_data,ras_push,exception,perfmonitors);
 140         endfunction
 141
 142 //      module mkTb(Empty);
 143 //
 144 //      rule test_alu;
 145 //              Bit#(64) op1='h8000000000004123;
 146 //              Int#(64) in1=unpack(op1);
 147 //              Bit#(64) op2='h8000000000004123;
 148 //              let {x,ea,flush}<-fn_alu(`FNSNE,op1,op2,'d0,'d0,BRANCH,'h800,'d3,Load,False);
 149 //              //$display("Output is: :%h Excepted: %h",x,(op1!=op2));
 150 //              $finish(0);
 151 //      endrule
 152 //      endmodule
 153
 154 endpackage