src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat
   6 from nmigen.cli import main, verilog
   7
   8 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase
   9
  10 class FPState(FPBase):
  11     def __init__(self, state_from, state_to):
  12         self.state_from = state_from
  13         self.state_to = state_to
  14
  15     def set_inputs(self, inputs):
  16         self.inputs = inputs
  17         for k,v in inputs.items():
  18             setattr(self, k, v)
  19
  20     def set_outputs(self, outputs):
  21         self.outputs = outputs
  22         for k,v in outputs.items():
  23             setattr(self, k, v)
  24
  25
  26 class FPGetOpA(FPState):
  27
  28     def action(self, m):
  29         self.get_op(m, self.in_a, self.a, self.state_to)
  30
  31
  32 class FPGetOpB(FPState):
  33
  34     def action(self, m):
  35         self.get_op(m, self.in_b, self.b, self.state_to)
  36
  37
  38 class FPADD(FPBase):
  39
  40     def __init__(self, width, single_cycle=False):
  41         FPBase.__init__(self)
  42         self.width = width
  43         self.single_cycle = single_cycle
  44
  45         self.in_a  = FPOp(width)
  46         self.in_b  = FPOp(width)
  47         self.out_z = FPOp(width)
  48
  49     def get_fragment(self, platform=None):
  50         """ creates the HDL code-fragment for FPAdd
  51         """
  52         m = Module()
  53
  54         # Latches
  55         a = FPNumIn(self.in_a, self.width)
  56         b = FPNumIn(self.in_b, self.width)
  57         z = FPNumOut(self.width, False)
  58
  59         m.submodules.fpnum_a = a
  60         m.submodules.fpnum_b = b
  61         m.submodules.fpnum_z = z
  62
  63         w = z.m_width + 4
  64         tot = Signal(w, reset_less=True) # sticky/round/guard, {mantissa} result, 1 overflow
  65
  66         of = Overflow()
  67         m.submodules.overflow = of
  68
  69         geta = FPGetOpA("get_a", "get_b")
  70         geta.set_inputs({"in_a": self.in_a})
  71         geta.set_outputs({"a": a})
  72         m.d.comb += a.v.eq(self.in_a.v) # links in_a to a
  73
  74         getb = FPGetOpB("get_b", "special_cases")
  75         getb.set_inputs({"in_b": self.in_b})
  76         getb.set_outputs({"b": b})
  77         m.d.comb += b.v.eq(self.in_b.v) # links in_b to b
  78
  79         with m.FSM() as fsm:
  80
  81             # ******
  82             # gets operand a
  83
  84             with m.State("get_a"):
  85                 geta.action(m)
  86
  87             # ******
  88             # gets operand b
  89
  90             with m.State("get_b"):
  91                 #self.get_op(m, self.in_b, b, "special_cases")
  92                 getb.action(m)
  93
  94             # ******
  95             # special cases: NaNs, infs, zeros, denormalised
  96             # NOTE: some of these are unique to add.  see "Special Operations"
  97             # https://steve.hollasch.net/cgindex/coding/ieeefloat.html
  98
  99             with m.State("special_cases"):
 100
 101                 s_nomatch = Signal()
 102                 m.d.comb += s_nomatch.eq(a.s != b.s)
 103
 104                 m_match = Signal()
 105                 m.d.comb += m_match.eq(a.m == b.m)
 106
 107                 # if a is NaN or b is NaN return NaN
 108                 with m.If(a.is_nan | b.is_nan):
 109                     m.next = "put_z"
 110                     m.d.sync += z.nan(1)
 111
 112                 # XXX WEIRDNESS for FP16 non-canonical NaN handling
 113                 # under review
 114
 115                 ## if a is zero and b is NaN return -b
 116                 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 117                 #    m.next = "put_z"
 118                 #    m.d.sync += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 119
 120                 ## if b is zero and a is NaN return -a
 121                 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 122                 #    m.next = "put_z"
 123                 #    m.d.sync += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 124
 125                 ## if a is -zero and b is NaN return -b
 126                 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 127                 #    m.next = "put_z"
 128                 #    m.d.sync += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 129
 130                 ## if b is -zero and a is NaN return -a
 131                 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 132                 #    m.next = "put_z"
 133                 #    m.d.sync += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 134
 135                 # if a is inf return inf (or NaN)
 136                 with m.Elif(a.is_inf):
 137                     m.next = "put_z"
 138                     m.d.sync += z.inf(a.s)
 139                     # if a is inf and signs don't match return NaN
 140                     with m.If(b.exp_128 & s_nomatch):
 141                         m.d.sync += z.nan(1)
 142
 143                 # if b is inf return inf
 144                 with m.Elif(b.is_inf):
 145                     m.next = "put_z"
 146                     m.d.sync += z.inf(b.s)
 147
 148                 # if a is zero and b zero return signed-a/b
 149                 with m.Elif(a.is_zero & b.is_zero):
 150                     m.next = "put_z"
 151                     m.d.sync += z.create(a.s & b.s, b.e, b.m[3:-1])
 152
 153                 # if a is zero return b
 154                 with m.Elif(a.is_zero):
 155                     m.next = "put_z"
 156                     m.d.sync += z.create(b.s, b.e, b.m[3:-1])
 157
 158                 # if b is zero return a
 159                 with m.Elif(b.is_zero):
 160                     m.next = "put_z"
 161                     m.d.sync += z.create(a.s, a.e, a.m[3:-1])
 162
 163                 # if a equal to -b return zero (+ve zero)
 164                 with m.Elif(s_nomatch & m_match & (a.e == b.e)):
 165                     m.next = "put_z"
 166                     m.d.sync += z.zero(0)
 167
 168                 # Denormalised Number checks
 169                 with m.Else():
 170                     m.next = "denormalise"
 171
 172             # ******
 173             # denormalise.
 174
 175             with m.State("denormalise"):
 176                 # Denormalised Number checks
 177                 m.next = "align"
 178                 self.denormalise(m, a)
 179                 self.denormalise(m, b)
 180
 181             # ******
 182             # align.
 183
 184             with m.State("align"):
 185                 if not self.single_cycle:
 186                     # NOTE: this does *not* do single-cycle multi-shifting,
 187                     #       it *STAYS* in the align state until exponents match
 188
 189                     # exponent of a greater than b: shift b down
 190                     with m.If(a.e > b.e):
 191                         m.d.sync += b.shift_down()
 192                     # exponent of b greater than a: shift a down
 193                     with m.Elif(a.e < b.e):
 194                         m.d.sync += a.shift_down()
 195                     # exponents equal: move to next stage.
 196                     with m.Else():
 197                         m.next = "add_0"
 198                 else:
 199                     # This one however (single-cycle) will do the shift
 200                     # in one go.
 201
 202                     # XXX TODO: the shifter used here is quite expensive
 203                     # having only one would be better
 204
 205                     ediff = Signal((len(a.e), True), reset_less=True)
 206                     ediffr = Signal((len(a.e), True), reset_less=True)
 207                     m.d.comb += ediff.eq(a.e - b.e)
 208                     m.d.comb += ediffr.eq(b.e - a.e)
 209                     with m.If(ediff > 0):
 210                         m.d.sync += b.shift_down_multi(ediff)
 211                     # exponent of b greater than a: shift a down
 212                     with m.Elif(ediff < 0):
 213                         m.d.sync += a.shift_down_multi(ediffr)
 214
 215                     m.next = "add_0"
 216
 217             # ******
 218             # First stage of add.  covers same-sign (add) and subtract
 219             # special-casing when mantissas are greater or equal, to
 220             # give greatest accuracy.
 221
 222             with m.State("add_0"):
 223                 m.next = "add_1"
 224                 m.d.sync += z.e.eq(a.e)
 225                 # same-sign (both negative or both positive) add mantissas
 226                 with m.If(a.s == b.s):
 227                     m.d.sync += [
 228                         tot.eq(Cat(a.m, 0) + Cat(b.m, 0)),
 229                         z.s.eq(a.s)
 230                     ]
 231                 # a mantissa greater than b, use a
 232                 with m.Elif(a.m >= b.m):
 233                     m.d.sync += [
 234                         tot.eq(Cat(a.m, 0) - Cat(b.m, 0)),
 235                         z.s.eq(a.s)
 236                     ]
 237                 # b mantissa greater than a, use b
 238                 with m.Else():
 239                     m.d.sync += [
 240                         tot.eq(Cat(b.m, 0) - Cat(a.m, 0)),
 241                         z.s.eq(b.s)
 242                 ]
 243
 244             # ******
 245             # Second stage of add: preparation for normalisation.
 246             # detects when tot sum is too big (tot[27] is kinda a carry bit)
 247
 248             with m.State("add_1"):
 249                 m.next = "normalise_1"
 250                 # tot[27] gets set when the sum overflows. shift result down
 251                 with m.If(tot[-1]):
 252                     m.d.sync += [
 253                         z.m.eq(tot[4:]),
 254                         of.m0.eq(tot[4]),
 255                         of.guard.eq(tot[3]),
 256                         of.round_bit.eq(tot[2]),
 257                         of.sticky.eq(tot[1] | tot[0]),
 258                         z.e.eq(z.e + 1)
 259                 ]
 260                 # tot[27] zero case
 261                 with m.Else():
 262                     m.d.sync += [
 263                         z.m.eq(tot[3:]),
 264                         of.m0.eq(tot[3]),
 265                         of.guard.eq(tot[2]),
 266                         of.round_bit.eq(tot[1]),
 267                         of.sticky.eq(tot[0])
 268                 ]
 269
 270             # ******
 271             # First stage of normalisation.
 272
 273             with m.State("normalise_1"):
 274                 self.normalise_1(m, z, of, "normalise_2")
 275
 276             # ******
 277             # Second stage of normalisation.
 278
 279             with m.State("normalise_2"):
 280                 self.normalise_2(m, z, of, "round")
 281
 282             # ******
 283             # rounding stage
 284
 285             with m.State("round"):
 286                 self.roundz(m, z, of, "corrections")
 287
 288             # ******
 289             # correction stage
 290
 291             with m.State("corrections"):
 292                 self.corrections(m, z, "pack")
 293
 294             # ******
 295             # pack stage
 296
 297             with m.State("pack"):
 298                 self.pack(m, z, "put_z")
 299
 300             # ******
 301             # put_z stage
 302
 303             with m.State("put_z"):
 304                 self.put_z(m, z, self.out_z, "get_a")
 305
 306         return m
 307
 308
 309 if __name__ == "__main__":
 310     alu = FPADD(width=32)
 311     main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports())
 312
 313
 314     # works... but don't use, just do "python fname.py convert -t v"
 315     #print (verilog.convert(alu, ports=[
 316     #                        ports=alu.in_a.ports() + \
 317     #                              alu.in_b.ports() + \
 318     #                              alu.out_z.ports())