4704e4ff6cbffd9889f32c2b2785cc9c02883407
[soc.git] / src / soc / fu / alu / main_stage.py
1 # This stage is intended to do most of the work of executing the Arithmetic
2 # instructions. This would be like the additions, compares, and sign-extension
3 # as well as carry and overflow generation. This module
4 # however should not gate the carry or overflow, that's up to the
5 # output stage
6
7 # License: LGPLv3+
8 # Copyright (C) 2020 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
9 # Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
10 # (michael: note that there are multiple copyright holders)
11
12 from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
13 from nmutil.pipemodbase import PipeModBase
14 from nmutil.extend import exts, extz
15 from soc.fu.alu.pipe_data import ALUInputData, ALUOutputData
16 from ieee754.part.partsig import SimdSignal
17 from openpower.decoder.power_enums import MicrOp
18
19 from openpower.decoder.power_fields import DecodeFields
20 from openpower.decoder.power_fieldsn import SignalBitRange
21
22
23 # microwatt calc_ov function.
24 def calc_ov(msb_a, msb_b, ca, msb_r):
25 return (ca ^ msb_r) & ~(msb_a ^ msb_b)
26
27
28 class ALUMainStage(PipeModBase):
29 def __init__(self, pspec):
30 super().__init__(pspec, "main")
31 self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
32 self.fields.create_specs()
33
34 def ispec(self):
35 return ALUInputData(self.pspec) # defines pipeline stage input format
36
37 def ospec(self):
38 return ALUOutputData(self.pspec) # defines pipeline stage output format
39
40 def elaborate(self, platform):
41 XLEN = self.pspec.XLEN
42 m = Module()
43 comb = m.d.comb
44
45 # convenience variables
46 cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0
47 xer_so_i, ov_o = self.i.xer_so, self.o.xer_ov
48 a, b, cry_i, op = self.i.a, self.i.b, self.i.xer_ca, self.i.ctx.op
49
50 # get L-field for OP_CMP
51 x_fields = self.fields.FormX
52 L = x_fields.L[0]
53
54 # check if op is 32-bit, and get sign bit from operand a
55 is_32bit = Signal(reset_less=True)
56
57 with m.If(op.insn_type == MicrOp.OP_CMP):
58 comb += is_32bit.eq(~L)
59
60 # little trick: do the add using only one add (not 2)
61 # LSB: carry-in [0]. op/result: [1:-1]. MSB: carry-out [-1]
62 add_a = Signal(a.width + 2, reset_less=True)
63 add_b = Signal(a.width + 2, reset_less=True)
64 add_o = Signal(a.width + 2, reset_less=True)
65
66 a_i = Signal.like(a)
67 b_i = Signal.like(b)
68 with m.If(op.insn_type == MicrOp.OP_CMP): # another temporary hack
69 comb += a_i.eq(a) # reaaaally need to move CMP
70 comb += b_i.eq(b) # into trap pipeline
71 with m.Elif(is_32bit):
72 with m.If(op.is_signed):
73 comb += a_i.eq(exts(a, 32, XLEN))
74 comb += b_i.eq(exts(b, 32, XLEN))
75 with m.Else():
76 comb += a_i.eq(extz(a, 32, XLEN))
77 comb += b_i.eq(extz(b, 32, XLEN))
78 with m.Else():
79 comb += a_i.eq(a)
80 comb += b_i.eq(b)
81
82 with m.If((op.insn_type == MicrOp.OP_ADD) |
83 (op.insn_type == MicrOp.OP_CMP)):
84 # in bit 0, 1+carry_in creates carry into bit 1 and above
85 comb += add_a.eq(Cat(cry_i[0], a_i, Const(0, 1)))
86 comb += add_b.eq(Cat(Const(1, 1), b_i, Const(0, 1)))
87 comb += add_o.eq(add_a + add_b)
88
89 ##########################
90 # main switch-statement for handling arithmetic operations
91
92 with m.Switch(op.insn_type):
93
94 ###################
95 #### CMP, CMPL v3.0B p85-86
96
97 with m.Case(MicrOp.OP_CMP):
98 a_n = Signal(XLEN) # temporary - inverted a
99 tval = Signal(5)
100 a_lt = Signal()
101 carry_32 = Signal()
102 carry_64 = Signal()
103 zerolo = Signal()
104 zerohi = Signal()
105 msb_a = Signal()
106 msb_b = Signal()
107 newcrf = Signal(4)
108
109 # this is supposed to be inverted (b-a, not a-b)
110 comb += a_n.eq(~a) # sigh a gets inverted
111 comb += carry_32.eq(add_o[33] ^ a[32] ^ b[32])
112 comb += carry_64.eq(add_o[XLEN+1])
113
114 comb += zerolo.eq(~((a_n[0:32] ^ b[0:32]).bool()))
115 comb += zerohi.eq(~((a_n[32:XLEN] ^ b[32:XLEN]).bool()))
116
117 with m.If(zerolo & (is_32bit | zerohi)):
118 # values are equal
119 comb += tval[2].eq(1)
120 with m.Else():
121 comb += msb_a.eq(Mux(is_32bit, a_n[31], a_n[XLEN-1]))
122 comb += msb_b.eq(Mux(is_32bit, b[31], b[XLEN-1]))
123 C0 = Const(0, 1)
124 with m.If(msb_a != msb_b):
125 # Subtraction might overflow, but
126 # comparison is clear from MSB difference.
127 # for signed, 0 is greater; for unsigned, 1 is greater
128 comb += tval.eq(Cat(msb_a, msb_b, C0, msb_b, msb_a))
129 with m.Else():
130 # Subtraction cannot overflow since MSBs are equal.
131 # carry = 1 indicates RA is smaller (signed or unsigned)
132 comb += a_lt.eq(Mux(is_32bit, carry_32, carry_64))
133 comb += tval.eq(Cat(~a_lt, a_lt, C0, ~a_lt, a_lt))
134 comb += cr0.data[0:2].eq(Cat(xer_so_i[0], tval[2]))
135 with m.If(op.is_signed):
136 comb += cr0.data[2:4].eq(tval[3:5])
137 with m.Else():
138 comb += cr0.data[2:4].eq(tval[0:2])
139 comb += cr0.ok.eq(1)
140
141 ###################
142 #### add v3.0B p67, p69-72
143
144 with m.Case(MicrOp.OP_ADD):
145 # bit 0 is not part of the result, top bit is the carry-out
146 comb += o.data.eq(add_o[1:-1])
147 comb += o.ok.eq(1) # output register
148
149 # see microwatt OP_ADD code
150 # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5
151 ca = Signal(2, reset_less=True)
152 comb += ca[0].eq(add_o[-1]) # XER.CA
153 comb += ca[1].eq(add_o[33] ^ (a_i[32] ^ b_i[32])) # XER.CA32
154 comb += cry_o.data.eq(ca)
155 comb += cry_o.ok.eq(1)
156 # 32-bit (ov[1]) and 64-bit (ov[0]) overflow
157 ov = Signal(2, reset_less=True)
158 comb += ov[0].eq(calc_ov(a_i[-1], b_i[-1], ca[0], add_o[-2]))
159 comb += ov[1].eq(calc_ov(a_i[31], b_i[31], ca[1], add_o[32]))
160 comb += ov_o.data.eq(ov)
161 comb += ov_o.ok.eq(1)
162
163 ###################
164 #### exts (sign-extend) v3.0B p96, p99
165
166 with m.Case(MicrOp.OP_EXTS):
167 with m.If(op.data_len == 1):
168 comb += o.data.eq(exts(a, 8, XLEN))
169 with m.If(op.data_len == 2):
170 comb += o.data.eq(exts(a, 16, XLEN))
171 with m.If(op.data_len == 4):
172 comb += o.data.eq(exts(a, 32, XLEN))
173 comb += o.ok.eq(1) # output register
174
175 ###################
176 #### cmpeqb v3.0B p88
177
178 with m.Case(MicrOp.OP_CMPEQB):
179 eqs = Signal(8, reset_less=True)
180 src1 = Signal(8, reset_less=True)
181 comb += src1.eq(a[0:8])
182 for i in range(8):
183 comb += eqs[i].eq(src1 == b[8*i:8*(i+1)])
184 comb += o.data[0].eq(eqs.any())
185 comb += o.ok.eq(0) # use o.data but do *not* actually output
186 comb += cr0.data.eq(Cat(Const(0, 2), eqs.any(), Const(0, 1)))
187 comb += cr0.ok.eq(1)
188
189 ###### sticky overflow and context, both pass-through #####
190
191 comb += self.o.xer_so.data.eq(xer_so_i)
192 comb += self.o.ctx.eq(self.i.ctx)
193
194 return m