correct FPRSQRT specialcases
[ieee754fpu.git] / src / ieee754 / fpdiv / div2.py
1 """IEEE Floating Point Divider
2
3 Relevant bugreport: http://bugs.libre-riscv.org/show_bug.cgi?id=99
4 """
5
6 from nmigen import Module, Signal, Elaboratable, Cat
7 from nmigen.cli import main, verilog
8
9 from ieee754.fpcommon.fpbase import FPState
10 from ieee754.fpcommon.postcalc import FPAddStage1Data
11 from ieee754.div_rem_sqrt_rsqrt.div_pipe import DivPipeOutputData
12
13
14 class FPDivStage2Mod(FPState, Elaboratable):
15 """ Second stage of div: preparation for normalisation.
16 """
17
18 def __init__(self, pspec):
19 self.pspec = pspec
20 self.i = self.ispec()
21 self.o = self.ospec()
22
23 def ispec(self):
24 return DivPipeOutputData(self.pspec) # Q/Rem in...
25
26 def ospec(self):
27 # XXX REQUIRED. MUST NOT BE CHANGED. this is the format
28 # required for ongoing processing (normalisation, correction etc.)
29 return FPAddStage1Data(self.pspec) # out to post-process
30
31 def process(self, i):
32 return self.o
33
34 def setup(self, m, i):
35 """ links module to inputs and outputs
36 """
37 m.submodules.div1 = self
38 m.d.comb += self.i.eq(i)
39
40 def elaborate(self, platform):
41 m = Module()
42
43 # copies sign and exponent and mantissa (mantissa and exponent to be
44 # overridden below)
45 m.d.comb += self.o.z.eq(self.i.z)
46
47 # TODO: this is "phase 3" of divide (the very end of the pipeline)
48 # takes the Q and R data (whatever) and performs
49 # last-stage guard/round/sticky and copies mantissa into z.
50 # post-processing stages take care of things from that point.
51
52 # NOTE: this phase does NOT do ACTUAL DIV processing, it ONLY
53 # does "conversion" *out* of the Q/REM last stage
54
55 # Operations and input/output mantissa ranges:
56 # fdiv:
57 # dividend [1.0, 2.0)
58 # divisor [1.0, 2.0)
59 # result (0.5, 2.0)
60 #
61 # fsqrt:
62 # radicand [1.0, 4.0)
63 # result [1.0, 2.0)
64 #
65 # frsqrt:
66 # radicand [1.0, 4.0)
67 # result (0.5, 1.0]
68
69 # following section partially normalizes result to the range [1.0, 2.0)
70
71 qr_int_part = Signal(2, reset_less=True)
72 m.d.comb += qr_int_part.eq(
73 self.i.quotient_root[self.pspec.core_config.fract_width:][:2])
74
75 need_shift = Signal(reset_less=True)
76
77 # shift left when result is less than 2.0 since result_m has 1 more
78 # fraction bit, making assigning to it the equivalent of dividing by 2.
79 # this all comes out to:
80 # if quotient_root < 2.0:
81 # # div by 2 from assign; mul by 2 from shift left
82 # result = (quotient_root * 2) / 2
83 # else:
84 # # div by 2 from assign
85 # result = quotient_root / 2
86 m.d.comb += need_shift.eq(qr_int_part < 2)
87
88 # one extra fraction bit to accommodate the result when not shifting
89 # and for effective div by 2
90 result_m_fract_width = self.pspec.core_config.fract_width + 1
91 # 1 integer bit since the numbers are less than 2.0
92 result_m = Signal(1 + result_m_fract_width, reset_less=True)
93 result_e = Signal(len(self.i.z.e), reset_less=True)
94
95 m.d.comb += [
96 result_m.eq(self.i.quotient_root << need_shift),
97 result_e.eq(self.i.z.e + (1 - need_shift))
98 ]
99
100 # result_m is now in the range [1.0, 2.0)
101
102 # FIXME: below comment block out of date
103 # NOTE: see FPDivStage0Mod comment. the quotient is assumed
104 # to be in the range 0.499999-recurring to 1.999998. normalisation
105 # will take care of that, *however*, it *might* be necessary to
106 # subtract 1 from the exponent and have one extra bit in the
107 # mantissa to compensate. this is pretty much exactly what's
108 # done in FPMUL, due to 0.5-0.9999 * 0.5-0.9999 also producing
109 # values within the range 0.5 to 1.999998
110 # FIXME: above comment block out of date
111
112 with m.If(~self.i.out_do_z): # FIXME: does this need to be conditional?
113 m.d.comb += [
114 self.o.z.m.eq(result_m[3:]),
115 self.o.of.m0.eq(result_m[3]), # copy of LSB
116 self.o.of.guard.eq(result_m[2]),
117 self.o.of.round_bit.eq(result_m[1]),
118 self.o.of.sticky.eq(result_m[0] | self.i.remainder.bool()),
119 self.o.z.e.eq(result_e),
120 ]
121
122 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
123 m.d.comb += self.o.oz.eq(self.i.oz)
124 m.d.comb += self.o.ctx.eq(self.i.ctx)
125
126 return m
127
128
129 class FPDivStage2(FPState):
130
131 def __init__(self, pspec):
132 FPState.__init__(self, "divider_1")
133 self.mod = FPDivStage2Mod(pspec)
134 self.out_z = FPNumBaseRecord(pspec, False)
135 self.out_of = Overflow()
136 self.norm_stb = Signal()
137
138 def setup(self, m, i):
139 """ links module to inputs and outputs
140 """
141 self.mod.setup(m, i)
142
143 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in div1 state
144
145 m.d.sync += self.out_of.eq(self.mod.out_of)
146 m.d.sync += self.out_z.eq(self.mod.out_z)
147 m.d.sync += self.norm_stb.eq(1)
148
149 def action(self, m):
150 m.next = "normalise_1"