src/ieee754/fpsqrt/fsqrt.py

   1 from sfpy import Float32
   2
   3
   4 # XXX DO NOT USE, fails on num=65536.  wark-wark...
   5 def sqrtsimple(num):
   6     res = 0
   7     bit = 1
   8
   9     while (bit < num):
  10         bit <<= 2
  11
  12     while (bit != 0):
  13         if (num >= res + bit):
  14             num -= res + bit
  15             res = (res >> 1) + bit
  16         else:
  17             res >>= 1
  18         bit >>= 2
  19
  20     return res
  21
  22
  23 def sqrt(num):
  24     D = num # D is input (from num)
  25     Q = 0 # quotient
  26     R = 0 # remainder
  27     for i in range(64, -1, -1): # negative ranges are weird...
  28
  29         R = (R<<2)|((D>>(i+i))&3)
  30
  31         if R >= 0:
  32             R -= ((Q<<2)|1) # -Q01
  33         else:
  34             R += ((Q<<2)|3) # +Q11
  35
  36         Q <<= 1
  37         if R >= 0:
  38             Q |= 1 # new Q
  39
  40     if R < 0:
  41         R = R + ((Q<<1)|1)
  42
  43     return Q, R
  44
  45
  46 # grabbed these from unit_test_single (convenience, this is just experimenting)
  47
  48 def get_mantissa(x):
  49     return 0x7fffff & x
  50
  51 def get_exponent(x):
  52     return ((x & 0x7f800000) >> 23) - 127
  53
  54 def set_exponent(x, e):
  55     return (x & ~0x7f800000) | ((e+127) << 23)
  56
  57 def get_sign(x):
  58     return ((x & 0x80000000) >> 31)
  59
  60 # convert FP32 to s/e/m
  61 def create_fp32(s, e, m):
  62     """ receive sign, exponent, mantissa, return FP32 """
  63     return set_exponent((s << 31) | get_mantissa(m))
  64
  65 # convert s/e/m to FP32
  66 def decode_fp32(x):
  67     """ receive FP32, return sign, exponent, mantissa """
  68     return get_sign(x), get_exponent(x), get_mantissa(x)
  69
  70
  71 # main function, takes mantissa and exponent as separate arguments
  72 # returns a tuple, sqrt'd mantissa, sqrt'd exponent
  73
  74 def main(mantissa, exponent):
  75     if exponent & 1 != 0:
  76         # shift mantissa up, subtract 1 from exp to compensate
  77         mantissa <<= 1
  78         exponent -= 1
  79     m, r = sqrt(mantissa)
  80     return m, r, exponent >> 1
  81
  82
  83 #normalization function
  84 def normalise(s, m, e, lowbits):
  85     if (lowbits >= 2):
  86         m += 1
  87     if get_mantissa(m) == ((1<<24)-1):
  88         e += 1
  89
  90     # this is 2nd-stage normalisation.  can move it to a separate fn.
  91
  92     #if the num is NaN, then adjust (normalised NaN rather than de-normed NaN)
  93     if (e == 128 & m !=0):
  94         # these are in IEEE754 format, this function returns s,e,m not z
  95         z[31] = 1       # sign (so, s=1)
  96         z[30:23] = 255  # exponent (minus 128, so e = 127
  97         z[22] = 1       # high bit of mantissa, so m = 1<<22 i think
  98         z[21:0] = 0     # rest of mantissa is zero, so m = 1<<22 is good.
  99
 100     #if the num is Inf, then adjust (to normalised +/-INF)
 101     if (e == 128):
 102         # these are in IEEE754 format, this function returns s,e,m not z
 103         z[31] = s       # s is already s, so do nothing to s.
 104         z[30:23] = 255  # have to subtract 128, so e = 127 (again)
 105         z[22:0] = 0     # mantissa... so m=0
 106
 107     return s, m, e
 108
 109
 110 def fsqrt_test(x):
 111
 112     xbits = x.bits
 113     print ("x", x, type(x))
 114     sq_test = x.sqrt()
 115     print ("sqrt", sq_test)
 116
 117     print (xbits, type(xbits))
 118     s, e, m = decode_fp32(xbits)
 119     print("x decode", s, e, m, hex(m))
 120
 121     m |= 1<<23 # set top bit (the missing "1" from mantissa)
 122     m <<= 27
 123
 124     sm, sr, se = main(m, e)
 125     lowbits = sm & 0x3
 126     sm >>= 2
 127     sm = get_mantissa(sm)
 128     #sm += 2
 129
 130     s, sm, se = normalise(s, sm, se, lowbits)
 131
 132     print("our  sqrt", s, se, sm, hex(sm), bin(sm), "lowbits", lowbits,
 133                                                     "rem", hex(sr))
 134     if lowbits >= 2:
 135         print ("probably needs rounding (+1 on mantissa)")
 136
 137     sq_xbits = sq_test.bits
 138     s, e, m = decode_fp32(sq_xbits)
 139     print ("sf32 sqrt", s, e, m, hex(m), bin(m))
 140     print ()
 141
 142 if __name__ == '__main__':
 143
 144     # quick test up to 1000 of two sqrt functions
 145     for Q in range(1, int(1e4)):
 146         print(Q, sqrt(Q), sqrtsimple(Q), int(Q**0.5))
 147         assert int(Q**0.5) == sqrtsimple(Q), "Q sqrtsimpl fail %d" % Q
 148         assert int(Q**0.5) == sqrt(Q)[0], "Q sqrt fail %d" % Q
 149
 150     # quick mantissa/exponent demo
 151     for e in range(26):
 152         for m in range(26):
 153             ms, mr, es = main(m, e)
 154             print("m:%d e:%d sqrt: m:%d-%d e:%d" % (m, e, ms, mr, es))
 155
 156     x = Float32(1234.123456789)
 157     fsqrt_test(x)
 158     x = Float32(32.1)
 159     fsqrt_test(x)
 160     x = Float32(16.0)
 161     fsqrt_test(x)
 162     x = Float32(8.0)
 163     fsqrt_test(x)
 164     x = Float32(8.5)
 165     fsqrt_test(x)
 166     x = Float32(3.14159265358979323)
 167     fsqrt_test(x)
 168     x = Float32(12.99392923123123)
 169     fsqrt_test(x)
 170     x = Float32(0.123456)
 171     fsqrt_test(x)
 172
 173
 174
 175
 176 """
 177
 178 Notes:
 179 https://pdfs.semanticscholar.org/5060/4e9aff0e37089c4ab9a376c3f35761ffe28b.pdf
 180
 181 //This is the main code of integer sqrt function found here:http://verilogcodes.blogspot.com/2017/11/a-verilog-function-for-finding-square-root.html
 182 //
 183
 184 module testbench;
 185
 186 reg [15:0] sqr;
 187
 188 //Verilog function to find square root of a 32 bit number.
 189 //The output is 16 bit.
 190 function [15:0] sqrt;
 191     input [31:0] num;  //declare input
 192     //intermediate signals.
 193     reg [31:0] a;
 194     reg [15:0] q;
 195     reg [17:0] left,right,r;
 196     integer i;
 197 begin
 198     //initialize all the variables.
 199     a = num;
 200     q = 0;
 201     i = 0;
 202     left = 0;   //input to adder/sub
 203     right = 0;  //input to adder/sub
 204     r = 0;  //remainder
 205     //run the calculations for 16 iterations.
 206     for(i=0;i<16;i=i+1) begin
 207         right = {q,r[17],1'b1};
 208         left = {r[15:0],a[31:30]};
 209         a = {a[29:0],2'b00};    //left shift by 2 bits.
 210         if (r[17] == 1) //add if r is negative
 211             r = left + right;
 212         else    //subtract if r is positive
 213             r = left - right;
 214         q = {q[14:0],!r[17]};
 215     end
 216     sqrt = q;   //final assignment of output.
 217 end
 218 endfunction //end of Function
 219
 220
 221 c version (from paper linked from URL)
 222
 223 unsigned squart(D, r) /*Non-Restoring sqrt*/
 224     unsigned D; /*D:32-bit unsigned integer to be square rooted */
 225     int *r;
 226 {
 227     unsigned Q = 0; /*Q:16-bit unsigned integer (root)*/
 228     int R = 0; /*R:17-bit integer (remainder)*/
 229     int i;
 230     for (i = 15;i>=0;i--) /*for each root bit*/
 231     {
 232         if (R>=0)
 233         { /*new remainder:*/
 234             R = R<<2)|((D>>(i+i))&3);
 235             R = R-((Q<<2)|1); /*-Q01*/
 236         }
 237         else
 238         { /*new remainder:*/
 239             R = R<<2)|((D>>(i+i))&3);
 240             R = R+((Q<<2)|3); /*+Q11*/
 241         }
 242         if (R>=0) Q = Q<<1)|1; /*new Q:*/
 243         else Q = Q<<1)|0; /*new Q:*/
 244     }
 245
 246     /*remainder adjusting*/
 247     if (R<0) R = R+((Q<<1)|1);
 248     *r = R; /*return remainder*/
 249     return(Q); /*return root*/
 250 }
 251
 252 From wikipedia page:
 253
 254 short isqrt(short num) {
 255     short res = 0;
 256     short bit = 1 << 14; // The second-to-top bit is set: 1 << 30 for 32 bits
 257
 258     // "bit" starts at the highest power of four <= the argument.
 259     while (bit > num)
 260         bit >>= 2;
 261
 262     while (bit != 0) {
 263         if (num >= res + bit) {
 264             num -= res + bit;
 265             res = (res >> 1) + bit;
 266         }
 267         else
 268             res >>= 1;
 269         bit >>= 2;
 270     }
 271     return res;
 272 }
 273
 274 """