5ae2bb253fa1356ec89f2960930b884faf51a2db
[ieee754fpu.git] / src / ieee754 / fpsqrt / fsqrt.py
1 from sfpy import Float32
2
3
4 # XXX DO NOT USE, fails on num=65536. wark-wark...
5 def sqrtsimple(num):
6 res = 0
7 bit = 1
8
9 while (bit < num):
10 bit <<= 2
11
12 while (bit != 0):
13 if (num >= res + bit):
14 num -= res + bit
15 res = (res >> 1) + bit
16 else:
17 res >>= 1
18 bit >>= 2
19
20 return res
21
22
23 def sqrt(num):
24 D = num # D is input (from num)
25 Q = 0 # quotient
26 R = 0 # remainder
27 for i in range(64, -1, -1): # negative ranges are weird...
28
29 R = (R<<2)|((D>>(i+i))&3)
30
31 if R >= 0:
32 R -= ((Q<<2)|1) # -Q01
33 else:
34 R += ((Q<<2)|3) # +Q11
35
36 Q <<= 1
37 if R >= 0:
38 Q |= 1 # new Q
39
40 if R < 0:
41 R = R + ((Q<<1)|1)
42
43 return Q, R
44
45
46 # grabbed these from unit_test_single (convenience, this is just experimenting)
47
48 def get_mantissa(x):
49 return 0x7fffff & x
50
51 def get_exponent(x):
52 return ((x & 0x7f800000) >> 23) - 127
53
54 def set_exponent(x, e):
55 return (x & ~0x7f800000) | ((e+127) << 23)
56
57 def get_sign(x):
58 return ((x & 0x80000000) >> 31)
59
60 # convert FP32 to s/e/m
61 def create_fp32(s, e, m):
62 """ receive sign, exponent, mantissa, return FP32 """
63 return set_exponent((s << 31) | get_mantissa(m))
64
65 # convert s/e/m to FP32
66 def decode_fp32(x):
67 """ receive FP32, return sign, exponent, mantissa """
68 return get_sign(x), get_exponent(x), get_mantissa(x)
69
70
71 # main function, takes mantissa and exponent as separate arguments
72 # returns a tuple, sqrt'd mantissa, sqrt'd exponent
73
74 def main(mantissa, exponent):
75 if exponent & 1 != 0:
76 # shift mantissa up, subtract 1 from exp to compensate
77 mantissa <<= 1
78 exponent -= 1
79 m, r = sqrt(mantissa)
80 return m, r, exponent >> 1
81
82
83 #normalization function
84 def normalise(s, m, e, lowbits):
85 if (lowbits >= 2):
86 m += 1
87 if get_mantissa(m) == ((1<<24)-1):
88 e += 1
89
90 # this is 2nd-stage normalisation. can move it to a separate fn.
91
92 #if the num is NaN, then adjust (normalised NaN rather than de-normed NaN)
93 if (e == 128 & m !=0):
94 # these are in IEEE754 format, this function returns s,e,m not z
95 z[31] = 1 # sign (so, s=1)
96 z[30:23] = 255 # exponent (minus 128, so e = 127
97 z[22] = 1 # high bit of mantissa, so m = 1<<22 i think
98 z[21:0] = 0 # rest of mantissa is zero, so m = 1<<22 is good.
99
100 #if the num is Inf, then adjust (to normalised +/-INF)
101 if (e == 128):
102 # these are in IEEE754 format, this function returns s,e,m not z
103 z[31] = s # s is already s, so do nothing to s.
104 z[30:23] = 255 # have to subtract 128, so e = 127 (again)
105 z[22:0] = 0 # mantissa... so m=0
106
107 return s, m, e
108
109
110 def fsqrt_test(x):
111
112 xbits = x.bits
113 print ("x", x, type(x))
114 sq_test = x.sqrt()
115 print ("sqrt", sq_test)
116
117 print (xbits, type(xbits))
118 s, e, m = decode_fp32(xbits)
119 print("x decode", s, e, m, hex(m))
120
121 m |= 1<<23 # set top bit (the missing "1" from mantissa)
122 m <<= 27
123
124 sm, sr, se = main(m, e)
125 lowbits = sm & 0x3
126 sm >>= 2
127 sm = get_mantissa(sm)
128 #sm += 2
129
130 s, sm, se = normalise(s, sm, se, lowbits)
131
132 print("our sqrt", s, se, sm, hex(sm), bin(sm), "lowbits", lowbits,
133 "rem", hex(sr))
134 if lowbits >= 2:
135 print ("probably needs rounding (+1 on mantissa)")
136
137 sq_xbits = sq_test.bits
138 s, e, m = decode_fp32(sq_xbits)
139 print ("sf32 sqrt", s, e, m, hex(m), bin(m))
140 print ()
141
142 if __name__ == '__main__':
143
144 # quick test up to 1000 of two sqrt functions
145 for Q in range(1, int(1e4)):
146 print(Q, sqrt(Q), sqrtsimple(Q), int(Q**0.5))
147 assert int(Q**0.5) == sqrtsimple(Q), "Q sqrtsimpl fail %d" % Q
148 assert int(Q**0.5) == sqrt(Q)[0], "Q sqrt fail %d" % Q
149
150 # quick mantissa/exponent demo
151 for e in range(26):
152 for m in range(26):
153 ms, mr, es = main(m, e)
154 print("m:%d e:%d sqrt: m:%d-%d e:%d" % (m, e, ms, mr, es))
155
156 x = Float32(1234.123456789)
157 fsqrt_test(x)
158 x = Float32(32.1)
159 fsqrt_test(x)
160 x = Float32(16.0)
161 fsqrt_test(x)
162 x = Float32(8.0)
163 fsqrt_test(x)
164 x = Float32(8.5)
165 fsqrt_test(x)
166 x = Float32(3.14159265358979323)
167 fsqrt_test(x)
168 x = Float32(12.99392923123123)
169 fsqrt_test(x)
170 x = Float32(0.123456)
171 fsqrt_test(x)
172
173
174
175
176 """
177
178 Notes:
179 https://pdfs.semanticscholar.org/5060/4e9aff0e37089c4ab9a376c3f35761ffe28b.pdf
180
181 //This is the main code of integer sqrt function found here:http://verilogcodes.blogspot.com/2017/11/a-verilog-function-for-finding-square-root.html
182 //
183
184 module testbench;
185
186 reg [15:0] sqr;
187
188 //Verilog function to find square root of a 32 bit number.
189 //The output is 16 bit.
190 function [15:0] sqrt;
191 input [31:0] num; //declare input
192 //intermediate signals.
193 reg [31:0] a;
194 reg [15:0] q;
195 reg [17:0] left,right,r;
196 integer i;
197 begin
198 //initialize all the variables.
199 a = num;
200 q = 0;
201 i = 0;
202 left = 0; //input to adder/sub
203 right = 0; //input to adder/sub
204 r = 0; //remainder
205 //run the calculations for 16 iterations.
206 for(i=0;i<16;i=i+1) begin
207 right = {q,r[17],1'b1};
208 left = {r[15:0],a[31:30]};
209 a = {a[29:0],2'b00}; //left shift by 2 bits.
210 if (r[17] == 1) //add if r is negative
211 r = left + right;
212 else //subtract if r is positive
213 r = left - right;
214 q = {q[14:0],!r[17]};
215 end
216 sqrt = q; //final assignment of output.
217 end
218 endfunction //end of Function
219
220
221 c version (from paper linked from URL)
222
223 unsigned squart(D, r) /*Non-Restoring sqrt*/
224 unsigned D; /*D:32-bit unsigned integer to be square rooted */
225 int *r;
226 {
227 unsigned Q = 0; /*Q:16-bit unsigned integer (root)*/
228 int R = 0; /*R:17-bit integer (remainder)*/
229 int i;
230 for (i = 15;i>=0;i--) /*for each root bit*/
231 {
232 if (R>=0)
233 { /*new remainder:*/
234 R = R<<2)|((D>>(i+i))&3);
235 R = R-((Q<<2)|1); /*-Q01*/
236 }
237 else
238 { /*new remainder:*/
239 R = R<<2)|((D>>(i+i))&3);
240 R = R+((Q<<2)|3); /*+Q11*/
241 }
242 if (R>=0) Q = Q<<1)|1; /*new Q:*/
243 else Q = Q<<1)|0; /*new Q:*/
244 }
245
246 /*remainder adjusting*/
247 if (R<0) R = R+((Q<<1)|1);
248 *r = R; /*return remainder*/
249 return(Q); /*return root*/
250 }
251
252 From wikipedia page:
253
254 short isqrt(short num) {
255 short res = 0;
256 short bit = 1 << 14; // The second-to-top bit is set: 1 << 30 for 32 bits
257
258 // "bit" starts at the highest power of four <= the argument.
259 while (bit > num)
260 bit >>= 2;
261
262 while (bit != 0) {
263 if (num >= res + bit) {
264 num -= res + bit;
265 res = (res >> 1) + bit;
266 }
267 else
268 res >>= 1;
269 bit >>= 2;
270 }
271 return res;
272 }
273
274 """