src/soc/fu/alu/main_stage.py

   1 # This stage is intended to do most of the work of executing the Arithmetic
   2 # instructions. This would be like the additions, compares, and sign-extension
   3 # as well as carry and overflow generation. This module
   4 # however should not gate the carry or overflow, that's up to the
   5 # output stage
   6
   7 # License: LGPLv3+
   8 # Copyright (C) 2020 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   9 # Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
  10 # (michael: note that there are multiple copyright holders)
  11
  12 from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
  13 from nmutil.pipemodbase import PipeModBase
  14 from nmutil.extend import exts, extz
  15 from soc.fu.alu.pipe_data import ALUInputData, ALUOutputData
  16 from ieee754.part.partsig import SimdSignal
  17 from openpower.decoder.power_enums import MicrOp
  18
  19 from openpower.decoder.power_fields import DecodeFields
  20 from openpower.decoder.power_fieldsn import SignalBitRange
  21
  22
  23 # microwatt calc_ov function.
  24 def calc_ov(msb_a, msb_b, ca, msb_r):
  25     return (ca ^ msb_r) & ~(msb_a ^ msb_b)
  26
  27
  28 class ALUMainStage(PipeModBase):
  29     def __init__(self, pspec):
  30         super().__init__(pspec, "main")
  31         self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
  32         self.fields.create_specs()
  33
  34     def ispec(self):
  35         return ALUInputData(self.pspec) # defines pipeline stage input format
  36
  37     def ospec(self):
  38         return ALUOutputData(self.pspec) # defines pipeline stage output format
  39
  40     def elaborate(self, platform):
  41         XLEN = self.pspec.XLEN
  42         m = Module()
  43         comb = m.d.comb
  44
  45         # convenience variables
  46         cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0
  47         xer_so_i, ov_o = self.i.xer_so, self.o.xer_ov
  48         a, b, cry_i, op = self.i.a, self.i.b, self.i.xer_ca, self.i.ctx.op
  49
  50         # get L-field for OP_CMP
  51         x_fields = self.fields.FormX
  52         L = x_fields.L[0]
  53
  54         # check if op is 32-bit, and get sign bit from operand a
  55         is_32bit = Signal(reset_less=True)
  56
  57         with m.If(op.insn_type == MicrOp.OP_CMP):
  58             comb += is_32bit.eq(~L)
  59
  60         # little trick: do the add using only one add (not 2)
  61         # LSB: carry-in [0].  op/result: [1:-1].  MSB: carry-out [-1]
  62         add_a = Signal(a.width + 2, reset_less=True)
  63         add_b = Signal(a.width + 2, reset_less=True)
  64         add_o = Signal(a.width + 2, reset_less=True)
  65
  66         a_i = Signal.like(a)
  67         b_i = Signal.like(b)
  68         with m.If(op.insn_type == MicrOp.OP_CMP): # another temporary hack
  69             comb += a_i.eq(a)                     # reaaaally need to move CMP
  70             comb += b_i.eq(b)                     # into trap pipeline
  71         with m.Elif(is_32bit):
  72             with m.If(op.is_signed):
  73                 comb += a_i.eq(exts(a, 32, XLEN))
  74                 comb += b_i.eq(exts(b, 32, XLEN))
  75             with m.Else():
  76                 comb += a_i.eq(extz(a, 32, XLEN))
  77                 comb += b_i.eq(extz(b, 32, XLEN))
  78         with m.Else():
  79             comb += a_i.eq(a)
  80             comb += b_i.eq(b)
  81
  82         with m.If((op.insn_type == MicrOp.OP_ADD) |
  83                   (op.insn_type == MicrOp.OP_CMP)):
  84             # in bit 0, 1+carry_in creates carry into bit 1 and above
  85             comb += add_a.eq(Cat(cry_i[0], a_i, Const(0, 1)))
  86             comb += add_b.eq(Cat(Const(1, 1), b_i, Const(0, 1)))
  87             comb += add_o.eq(add_a + add_b)
  88
  89         ##########################
  90         # main switch-statement for handling arithmetic operations
  91
  92         with m.Switch(op.insn_type):
  93
  94             ###################
  95             #### CMP, CMPL v3.0B p85-86
  96
  97             with m.Case(MicrOp.OP_CMP):
  98                 a_n = Signal(XLEN) # temporary - inverted a
  99                 tval = Signal(5)
 100                 a_lt = Signal()
 101                 carry_32 = Signal()
 102                 carry_64 = Signal()
 103                 zerolo = Signal()
 104                 zerohi = Signal()
 105                 msb_a = Signal()
 106                 msb_b = Signal()
 107                 newcrf = Signal(4)
 108
 109                 # this is supposed to be inverted (b-a, not a-b)
 110                 comb += a_n.eq(~a) # sigh a gets inverted
 111                 comb += carry_32.eq(add_o[33] ^ a[32] ^ b[32])
 112                 comb += carry_64.eq(add_o[XLEN+1])
 113
 114                 comb += zerolo.eq(~((a_n[0:32] ^ b[0:32]).bool()))
 115                 comb += zerohi.eq(~((a_n[32:XLEN] ^ b[32:XLEN]).bool()))
 116
 117                 with m.If(zerolo & (is_32bit | zerohi)):
 118                     # values are equal
 119                     comb += tval[2].eq(1)
 120                 with m.Else():
 121                     comb += msb_a.eq(Mux(is_32bit, a_n[31], a_n[XLEN-1]))
 122                     comb += msb_b.eq(Mux(is_32bit, b[31], b[XLEN-1]))
 123                     C0 = Const(0, 1)
 124                     with m.If(msb_a != msb_b):
 125                         # Subtraction might overflow, but
 126                         # comparison is clear from MSB difference.
 127                         # for signed, 0 is greater; for unsigned, 1 is greater
 128                         comb += tval.eq(Cat(msb_a, msb_b, C0, msb_b, msb_a))
 129                     with m.Else():
 130                         # Subtraction cannot overflow since MSBs are equal.
 131                         # carry = 1 indicates RA is smaller (signed or unsigned)
 132                         comb += a_lt.eq(Mux(is_32bit, carry_32, carry_64))
 133                         comb += tval.eq(Cat(~a_lt, a_lt, C0, ~a_lt, a_lt))
 134                 comb += cr0.data[0:2].eq(Cat(xer_so_i[0], tval[2]))
 135                 with m.If(op.is_signed):
 136                     comb += cr0.data[2:4].eq(tval[3:5])
 137                 with m.Else():
 138                     comb += cr0.data[2:4].eq(tval[0:2])
 139                 comb += cr0.ok.eq(1)
 140
 141             ###################
 142             #### add v3.0B p67, p69-72
 143
 144             with m.Case(MicrOp.OP_ADD):
 145                 # bit 0 is not part of the result, top bit is the carry-out
 146                 comb += o.data.eq(add_o[1:-1])
 147                 comb += o.ok.eq(1) # output register
 148
 149                 # see microwatt OP_ADD code
 150                 # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5
 151                 ca = Signal(2, reset_less=True)
 152                 comb += ca[0].eq(add_o[-1])                   # XER.CA
 153                 comb += ca[1].eq(add_o[33] ^ (a_i[32] ^ b_i[32])) # XER.CA32
 154                 comb += cry_o.data.eq(ca)
 155                 comb += cry_o.ok.eq(1)
 156                 # 32-bit (ov[1]) and 64-bit (ov[0]) overflow
 157                 ov = Signal(2, reset_less=True)
 158                 comb += ov[0].eq(calc_ov(a_i[-1], b_i[-1], ca[0], add_o[-2]))
 159                 comb += ov[1].eq(calc_ov(a_i[31], b_i[31], ca[1], add_o[32]))
 160                 comb += ov_o.data.eq(ov)
 161                 comb += ov_o.ok.eq(1)
 162
 163             ###################
 164             #### exts (sign-extend) v3.0B p96, p99
 165
 166             with m.Case(MicrOp.OP_EXTS):
 167                 with m.If(op.data_len == 1):
 168                     comb += o.data.eq(exts(a, 8, XLEN))
 169                 with m.If(op.data_len == 2):
 170                     comb += o.data.eq(exts(a, 16, XLEN))
 171                 with m.If(op.data_len == 4):
 172                     comb += o.data.eq(exts(a, 32, XLEN))
 173                 comb += o.ok.eq(1) # output register
 174
 175             ###################
 176             #### cmpeqb v3.0B p88
 177
 178             with m.Case(MicrOp.OP_CMPEQB):
 179                 eqs = Signal(8, reset_less=True)
 180                 src1 = Signal(8, reset_less=True)
 181                 comb += src1.eq(a[0:8])
 182                 for i in range(8):
 183                     comb += eqs[i].eq(src1 == b[8*i:8*(i+1)])
 184                 comb += o.data[0].eq(eqs.any())
 185                 comb += o.ok.eq(0) # use o.data but do *not* actually output
 186                 comb += cr0.data.eq(Cat(Const(0, 2), eqs.any(), Const(0, 1)))
 187                 comb += cr0.ok.eq(1)
 188
 189         ###### sticky overflow and context, both pass-through #####
 190
 191         comb += self.o.xer_so.data.eq(xer_so_i)
 192         comb += self.o.ctx.eq(self.i.ctx)
 193
 194         return m