src/ieee754/fpdiv/div2.py

   1 """IEEE Floating Point Divider
   2
   3 Relevant bugreport: http://bugs.libre-riscv.org/show_bug.cgi?id=99
   4 """
   5
   6 from nmigen import Module, Signal, Elaboratable, Cat
   7 from nmigen.cli import main, verilog
   8
   9 from ieee754.fpcommon.fpbase import FPState
  10 from ieee754.fpcommon.postcalc import FPAddStage1Data
  11 from ieee754.div_rem_sqrt_rsqrt.div_pipe import DivPipeOutputData
  12
  13
  14 class FPDivStage2Mod(FPState, Elaboratable):
  15     """ Second stage of div: preparation for normalisation.
  16     """
  17
  18     def __init__(self, pspec):
  19         self.pspec = pspec
  20         self.i = self.ispec()
  21         self.o = self.ospec()
  22
  23     def ispec(self):
  24         return DivPipeOutputData(self.pspec)  # Q/Rem in...
  25
  26     def ospec(self):
  27         # XXX REQUIRED.  MUST NOT BE CHANGED.  this is the format
  28         # required for ongoing processing (normalisation, correction etc.)
  29         return FPAddStage1Data(self.pspec)  # out to post-process
  30
  31     def process(self, i):
  32         return self.o
  33
  34     def setup(self, m, i):
  35         """ links module to inputs and outputs
  36         """
  37         m.submodules.div1 = self
  38         m.d.comb += self.i.eq(i)
  39
  40     def elaborate(self, platform):
  41         m = Module()
  42
  43         # copies sign and exponent and mantissa (mantissa and exponent to be
  44         # overridden below)
  45         m.d.comb += self.o.z.eq(self.i.z)
  46
  47         # TODO: this is "phase 3" of divide (the very end of the pipeline)
  48         # takes the Q and R data (whatever) and performs
  49         # last-stage guard/round/sticky and copies mantissa into z.
  50         # post-processing stages take care of things from that point.
  51
  52         # NOTE: this phase does NOT do ACTUAL DIV processing, it ONLY
  53         # does "conversion" *out* of the Q/REM last stage
  54
  55         # Operations and input/output mantissa ranges:
  56         # fdiv:
  57         #   dividend [1.0, 2.0)
  58         #   divisor [1.0, 2.0)
  59         #   result (0.5, 2.0)
  60         #
  61         # fsqrt:
  62         #   radicand [1.0, 4.0)
  63         #   result [1.0, 2.0)
  64         #
  65         # frsqrt:
  66         #   radicand [1.0, 4.0)
  67         #   result (0.5, 1.0]
  68
  69         # following section partially normalizes result to the range [1.0, 2.0)
  70
  71         qr_int_part = Signal(2, reset_less=True)
  72         m.d.comb += qr_int_part.eq(
  73             self.i.quotient_root[self.pspec.core_config.fract_width:][:2])
  74
  75         need_shift = Signal(reset_less=True)
  76
  77         # shift left when result is less than 2.0 since result_m has 1 more
  78         # fraction bit, making assigning to it the equivalent of dividing by 2.
  79         # this all comes out to:
  80         # if quotient_root < 2.0:
  81         #     # div by 2 from assign; mul by 2 from shift left
  82         #     result = (quotient_root * 2) / 2
  83         # else:
  84         #     # div by 2 from assign
  85         #     result = quotient_root / 2
  86         m.d.comb += need_shift.eq(qr_int_part < 2)
  87
  88         # one extra fraction bit to accommodate the result when not shifting
  89         # and for effective div by 2
  90         result_m_fract_width = self.pspec.core_config.fract_width + 1
  91         # 1 integer bit since the numbers are less than 2.0
  92         result_m = Signal(1 + result_m_fract_width, reset_less=True)
  93         result_e = Signal(len(self.i.z.e), reset_less=True)
  94
  95         m.d.comb += [
  96             result_m.eq(self.i.quotient_root << need_shift),
  97             result_e.eq(self.i.z.e + (1 - need_shift))
  98         ]
  99
 100         # result_m is now in the range [1.0, 2.0)
 101
 102         # FIXME: below comment block out of date
 103         # NOTE: see FPDivStage0Mod comment.  the quotient is assumed
 104         # to be in the range 0.499999-recurring to 1.999998.  normalisation
 105         # will take care of that, *however*, it *might* be necessary to
 106         # subtract 1 from the exponent and have one extra bit in the
 107         # mantissa to compensate.  this is pretty much exactly what's
 108         # done in FPMUL, due to 0.5-0.9999 * 0.5-0.9999 also producing
 109         # values within the range 0.5 to 1.999998
 110         # FIXME: above comment block out of date
 111
 112         with m.If(~self.i.out_do_z):  # FIXME: does this need to be conditional?
 113             m.d.comb += [
 114                 self.o.z.m.eq(result_m[3:]),
 115                 self.o.of.m0.eq(result_m[3]),  # copy of LSB
 116                 self.o.of.guard.eq(result_m[2]),
 117                 self.o.of.round_bit.eq(result_m[1]),
 118                 self.o.of.sticky.eq(result_m[0] | self.i.remainder.bool()),
 119                 self.o.z.e.eq(result_e),
 120             ]
 121
 122         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 123         m.d.comb += self.o.oz.eq(self.i.oz)
 124         m.d.comb += self.o.ctx.eq(self.i.ctx)
 125
 126         return m
 127
 128
 129 class FPDivStage2(FPState):
 130
 131     def __init__(self, pspec):
 132         FPState.__init__(self, "divider_1")
 133         self.mod = FPDivStage2Mod(pspec)
 134         self.out_z = FPNumBaseRecord(pspec, False)
 135         self.out_of = Overflow()
 136         self.norm_stb = Signal()
 137
 138     def setup(self, m, i):
 139         """ links module to inputs and outputs
 140         """
 141         self.mod.setup(m, i)
 142
 143         m.d.sync += self.norm_stb.eq(0)  # sets to zero when not in div1 state
 144
 145         m.d.sync += self.out_of.eq(self.mod.out_of)
 146         m.d.sync += self.out_z.eq(self.mod.out_z)
 147         m.d.sync += self.norm_stb.eq(1)
 148
 149     def action(self, m):
 150         m.next = "normalise_1"