fix ALU with XLEN=32, carry and overflow
[soc.git] / src / soc / fu / alu / main_stage.py
1 # This stage is intended to do most of the work of executing the Arithmetic
2 # instructions. This would be like the additions, compares, and sign-extension
3 # as well as carry and overflow generation. This module
4 # however should not gate the carry or overflow, that's up to the
5 # output stage
6
7 # License: LGPLv3+
8 # Copyright (C) 2020 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
9 # Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
10 # (michael: note that there are multiple copyright holders)
11
12 from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
13 from nmutil.pipemodbase import PipeModBase
14 from nmutil.extend import exts, extz
15 from soc.fu.alu.pipe_data import ALUInputData, ALUOutputData
16 from ieee754.part.partsig import SimdSignal
17 from openpower.decoder.power_enums import MicrOp
18
19 from openpower.decoder.power_fields import DecodeFields
20 from openpower.decoder.power_fieldsn import SignalBitRange
21
22
23 # microwatt calc_ov function.
24 def calc_ov(msb_a, msb_b, ca, msb_r):
25 return (ca ^ msb_r) & ~(msb_a ^ msb_b)
26
27
28 class ALUMainStage(PipeModBase):
29 def __init__(self, pspec):
30 super().__init__(pspec, "main")
31 self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
32 self.fields.create_specs()
33
34 def ispec(self):
35 return ALUInputData(self.pspec) # defines pipeline stage input format
36
37 def ospec(self):
38 return ALUOutputData(self.pspec) # defines pipeline stage output format
39
40 def elaborate(self, platform):
41 XLEN = self.pspec.XLEN
42 m = Module()
43 comb = m.d.comb
44
45 # convenience variables
46 cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0
47 xer_so_i, ov_o = self.i.xer_so, self.o.xer_ov
48 a, b, cry_i, op = self.i.a, self.i.b, self.i.xer_ca, self.i.ctx.op
49
50 # get L-field for OP_CMP
51 x_fields = self.fields.FormX
52 L = x_fields.L[0]
53
54 # check if op is 32-bit, and get sign bit from operand a
55 is_32bit = Signal(reset_less=True)
56
57 with m.If(op.insn_type == MicrOp.OP_CMP):
58 comb += is_32bit.eq(~L)
59
60 # little trick: do the add using only one add (not 2)
61 # LSB: carry-in [0]. op/result: [1:-1]. MSB: carry-out [-1]
62 add_a = Signal(a.width + 2, reset_less=True)
63 add_b = Signal(a.width + 2, reset_less=True)
64 add_o = Signal(a.width + 2, reset_less=True)
65
66 a_i = Signal.like(a)
67 b_i = Signal.like(b)
68 with m.If(op.insn_type == MicrOp.OP_CMP): # another temporary hack
69 comb += a_i.eq(a) # reaaaally need to move CMP
70 comb += b_i.eq(b) # into trap pipeline
71 with m.Elif(is_32bit):
72 with m.If(op.is_signed):
73 comb += a_i.eq(exts(a, 32, XLEN))
74 comb += b_i.eq(exts(b, 32, XLEN))
75 with m.Else():
76 comb += a_i.eq(extz(a, 32, XLEN))
77 comb += b_i.eq(extz(b, 32, XLEN))
78 with m.Else():
79 comb += a_i.eq(a)
80 comb += b_i.eq(b)
81
82 with m.If((op.insn_type == MicrOp.OP_ADD) |
83 (op.insn_type == MicrOp.OP_CMP)):
84 # in bit 0, 1+carry_in creates carry into bit 1 and above
85 comb += add_a.eq(Cat(cry_i[0], a_i, Const(0, 1)))
86 comb += add_b.eq(Cat(Const(1, 1), b_i, Const(0, 1)))
87 comb += add_o.eq(add_a + add_b)
88
89 ##########################
90 # main switch-statement for handling arithmetic operations
91
92 with m.Switch(op.insn_type):
93
94 ###################
95 #### CMP, CMPL v3.0B p85-86
96
97 with m.Case(MicrOp.OP_CMP):
98 a_n = Signal(XLEN) # temporary - inverted a
99 tval = Signal(5)
100 a_lt = Signal()
101 carry_32 = Signal()
102 carry_64 = Signal()
103 zerolo = Signal()
104 zerohi = Signal()
105 msb_a = Signal()
106 msb_b = Signal()
107 newcrf = Signal(4)
108
109 # this is supposed to be inverted (b-a, not a-b)
110 comb += a_n.eq(~a) # sigh a gets inverted
111 if XLEN == 64:
112 comb += carry_32.eq(add_o[33] ^ a[32] ^ b[32])
113 else:
114 comb += carry_32.eq(add_o[XLEN+1])
115 comb += carry_64.eq(add_o[XLEN+1])
116
117 comb += zerolo.eq(~((a_n[0:32] ^ b[0:32]).bool()))
118 comb += zerohi.eq(~((a_n[32:XLEN] ^ b[32:XLEN]).bool()))
119
120 with m.If(zerolo & (is_32bit | zerohi)):
121 # values are equal
122 comb += tval[2].eq(1)
123 with m.Else():
124 comb += msb_a.eq(Mux(is_32bit, a_n[31], a_n[XLEN-1]))
125 comb += msb_b.eq(Mux(is_32bit, b[31], b[XLEN-1]))
126 C0 = Const(0, 1)
127 with m.If(msb_a != msb_b):
128 # Subtraction might overflow, but
129 # comparison is clear from MSB difference.
130 # for signed, 0 is greater; for unsigned, 1 is greater
131 comb += tval.eq(Cat(msb_a, msb_b, C0, msb_b, msb_a))
132 with m.Else():
133 # Subtraction cannot overflow since MSBs are equal.
134 # carry = 1 indicates RA is smaller (signed or unsigned)
135 comb += a_lt.eq(Mux(is_32bit, carry_32, carry_64))
136 comb += tval.eq(Cat(~a_lt, a_lt, C0, ~a_lt, a_lt))
137 comb += cr0.data[0:2].eq(Cat(xer_so_i[0], tval[2]))
138 with m.If(op.is_signed):
139 comb += cr0.data[2:4].eq(tval[3:5])
140 with m.Else():
141 comb += cr0.data[2:4].eq(tval[0:2])
142 comb += cr0.ok.eq(1)
143
144 ###################
145 #### add v3.0B p67, p69-72
146
147 with m.Case(MicrOp.OP_ADD):
148 # bit 0 is not part of the result, top bit is the carry-out
149 comb += o.data.eq(add_o[1:-1])
150 comb += o.ok.eq(1) # output register
151
152 # see microwatt OP_ADD code
153 # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5
154 ca = Signal(2, reset_less=True)
155 comb += ca[0].eq(add_o[-1]) # XER.CA
156 if XLEN == 64:
157 comb += ca[1].eq(add_o[33] ^ (a_i[32] ^ b_i[32])) # XER.CA32
158 else:
159 comb += ca[1].eq(add_o[-1]) # XER.CA32
160 comb += cry_o.data.eq(ca)
161 comb += cry_o.ok.eq(1)
162 # 32-bit (ov[1]) and 64-bit (ov[0]) overflow
163 ov = Signal(2, reset_less=True)
164 comb += ov[0].eq(calc_ov(a_i[-1], b_i[-1], ca[0], add_o[-2]))
165 if XLEN == 64:
166 comb += ov[1].eq(calc_ov(a_i[31], b_i[31], ca[1],
167 add_o[32]))
168 else:
169 comb += ov[1].eq(calc_ov(a_i[-1], b_i[-1], ca[0],
170 add_o[-2]))
171 comb += ov_o.data.eq(ov)
172 comb += ov_o.ok.eq(1)
173
174 ###################
175 #### exts (sign-extend) v3.0B p96, p99
176
177 with m.Case(MicrOp.OP_EXTS):
178 with m.If(op.data_len == 1):
179 comb += o.data.eq(exts(a, 8, XLEN))
180 with m.If(op.data_len == 2):
181 comb += o.data.eq(exts(a, 16, XLEN))
182 with m.If(op.data_len == 4):
183 comb += o.data.eq(exts(a, 32, XLEN))
184 comb += o.ok.eq(1) # output register
185
186 ###################
187 #### cmpeqb v3.0B p88
188
189 with m.Case(MicrOp.OP_CMPEQB):
190 eqs = Signal(8, reset_less=True)
191 src1 = Signal(8, reset_less=True)
192 comb += src1.eq(a[0:8])
193 for i in range(8):
194 comb += eqs[i].eq(src1 == b[8*i:8*(i+1)])
195 comb += o.data[0].eq(eqs.any())
196 comb += o.ok.eq(0) # use o.data but do *not* actually output
197 comb += cr0.data.eq(Cat(Const(0, 2), eqs.any(), Const(0, 1)))
198 comb += cr0.ok.eq(1)
199
200 ###### sticky overflow and context, both pass-through #####
201
202 comb += self.o.xer_so.data.eq(xer_so_i)
203 comb += self.o.ctx.eq(self.i.ctx)
204
205 return m