src/libm/e_sqrt.c
changeset 2756 a98604b691c8
child 3162 dc1eb82ffdaa
equal deleted inserted replaced
2755:2a3ec308d995 2756:a98604b691c8
       
     1 /* @(#)e_sqrt.c 5.1 93/09/24 */
       
     2 /*
       
     3  * ====================================================
       
     4  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
       
     5  *
       
     6  * Developed at SunPro, a Sun Microsystems, Inc. business.
       
     7  * Permission to use, copy, modify, and distribute this
       
     8  * software is freely granted, provided that this notice
       
     9  * is preserved.
       
    10  * ====================================================
       
    11  */
       
    12 
       
    13 #if defined(LIBM_SCCS) && !defined(lint)
       
    14 static char rcsid[] = "$NetBSD: e_sqrt.c,v 1.8 1995/05/10 20:46:17 jtc Exp $";
       
    15 #endif
       
    16 
       
    17 /* __ieee754_sqrt(x)
       
    18  * Return correctly rounded sqrt.
       
    19  *           ------------------------------------------
       
    20  *	     |  Use the hardware sqrt if you have one |
       
    21  *           ------------------------------------------
       
    22  * Method:
       
    23  *   Bit by bit method using integer arithmetic. (Slow, but portable)
       
    24  *   1. Normalization
       
    25  *	Scale x to y in [1,4) with even powers of 2:
       
    26  *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
       
    27  *		sqrt(x) = 2^k * sqrt(y)
       
    28  *   2. Bit by bit computation
       
    29  *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
       
    30  *	     i							 0
       
    31  *                                     i+1         2
       
    32  *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
       
    33  *	     i      i            i                 i
       
    34  *
       
    35  *	To compute q    from q , one checks whether
       
    36  *		    i+1       i
       
    37  *
       
    38  *			      -(i+1) 2
       
    39  *			(q + 2      ) <= y.			(2)
       
    40  *     			  i
       
    41  *							      -(i+1)
       
    42  *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
       
    43  *		 	       i+1   i             i+1   i
       
    44  *
       
    45  *	With some algebric manipulation, it is not difficult to see
       
    46  *	that (2) is equivalent to
       
    47  *                             -(i+1)
       
    48  *			s  +  2       <= y			(3)
       
    49  *			 i                i
       
    50  *
       
    51  *	The advantage of (3) is that s  and y  can be computed by
       
    52  *				      i      i
       
    53  *	the following recurrence formula:
       
    54  *	    if (3) is false
       
    55  *
       
    56  *	    s     =  s  ,	y    = y   ;			(4)
       
    57  *	     i+1      i		 i+1    i
       
    58  *
       
    59  *	    otherwise,
       
    60  *                         -i                     -(i+1)
       
    61  *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
       
    62  *           i+1      i          i+1    i     i
       
    63  *
       
    64  *	One may easily use induction to prove (4) and (5).
       
    65  *	Note. Since the left hand side of (3) contain only i+2 bits,
       
    66  *	      it does not necessary to do a full (53-bit) comparison
       
    67  *	      in (3).
       
    68  *   3. Final rounding
       
    69  *	After generating the 53 bits result, we compute one more bit.
       
    70  *	Together with the remainder, we can decide whether the
       
    71  *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
       
    72  *	(it will never equal to 1/2ulp).
       
    73  *	The rounding mode can be detected by checking whether
       
    74  *	huge + tiny is equal to huge, and whether huge - tiny is
       
    75  *	equal to huge for some floating point number "huge" and "tiny".
       
    76  *
       
    77  * Special cases:
       
    78  *	sqrt(+-0) = +-0 	... exact
       
    79  *	sqrt(inf) = inf
       
    80  *	sqrt(-ve) = NaN		... with invalid signal
       
    81  *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
       
    82  *
       
    83  * Other methods : see the appended file at the end of the program below.
       
    84  *---------------
       
    85  */
       
    86 
       
    87 #include "math.h"
       
    88 #include "math_private.h"
       
    89 
       
    90 #ifdef __STDC__
       
    91 static const double one = 1.0, tiny = 1.0e-300;
       
    92 #else
       
    93 static double one = 1.0, tiny = 1.0e-300;
       
    94 #endif
       
    95 
       
    96 #ifdef __STDC__
       
    97 double attribute_hidden
       
    98 __ieee754_sqrt(double x)
       
    99 #else
       
   100 double attribute_hidden
       
   101 __ieee754_sqrt(x)
       
   102      double x;
       
   103 #endif
       
   104 {
       
   105     double z;
       
   106     int32_t sign = (int) 0x80000000;
       
   107     int32_t ix0, s0, q, m, t, i;
       
   108     u_int32_t r, t1, s1, ix1, q1;
       
   109 
       
   110     EXTRACT_WORDS(ix0, ix1, x);
       
   111 
       
   112     /* take care of Inf and NaN */
       
   113     if ((ix0 & 0x7ff00000) == 0x7ff00000) {
       
   114         return x * x + x;       /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
       
   115                                    sqrt(-inf)=sNaN */
       
   116     }
       
   117     /* take care of zero */
       
   118     if (ix0 <= 0) {
       
   119         if (((ix0 & (~sign)) | ix1) == 0)
       
   120             return x;           /* sqrt(+-0) = +-0 */
       
   121         else if (ix0 < 0)
       
   122             return (x - x) / (x - x);   /* sqrt(-ve) = sNaN */
       
   123     }
       
   124     /* normalize x */
       
   125     m = (ix0 >> 20);
       
   126     if (m == 0) {               /* subnormal x */
       
   127         while (ix0 == 0) {
       
   128             m -= 21;
       
   129             ix0 |= (ix1 >> 11);
       
   130             ix1 <<= 21;
       
   131         }
       
   132         for (i = 0; (ix0 & 0x00100000) == 0; i++)
       
   133             ix0 <<= 1;
       
   134         m -= i - 1;
       
   135         ix0 |= (ix1 >> (32 - i));
       
   136         ix1 <<= i;
       
   137     }
       
   138     m -= 1023;                  /* unbias exponent */
       
   139     ix0 = (ix0 & 0x000fffff) | 0x00100000;
       
   140     if (m & 1) {                /* odd m, double x to make it even */
       
   141         ix0 += ix0 + ((ix1 & sign) >> 31);
       
   142         ix1 += ix1;
       
   143     }
       
   144     m >>= 1;                    /* m = [m/2] */
       
   145 
       
   146     /* generate sqrt(x) bit by bit */
       
   147     ix0 += ix0 + ((ix1 & sign) >> 31);
       
   148     ix1 += ix1;
       
   149     q = q1 = s0 = s1 = 0;       /* [q,q1] = sqrt(x) */
       
   150     r = 0x00200000;             /* r = moving bit from right to left */
       
   151 
       
   152     while (r != 0) {
       
   153         t = s0 + r;
       
   154         if (t <= ix0) {
       
   155             s0 = t + r;
       
   156             ix0 -= t;
       
   157             q += r;
       
   158         }
       
   159         ix0 += ix0 + ((ix1 & sign) >> 31);
       
   160         ix1 += ix1;
       
   161         r >>= 1;
       
   162     }
       
   163 
       
   164     r = sign;
       
   165     while (r != 0) {
       
   166         t1 = s1 + r;
       
   167         t = s0;
       
   168         if ((t < ix0) || ((t == ix0) && (t1 <= ix1))) {
       
   169             s1 = t1 + r;
       
   170             if (((t1 & sign) == sign) && (s1 & sign) == 0)
       
   171                 s0 += 1;
       
   172             ix0 -= t;
       
   173             if (ix1 < t1)
       
   174                 ix0 -= 1;
       
   175             ix1 -= t1;
       
   176             q1 += r;
       
   177         }
       
   178         ix0 += ix0 + ((ix1 & sign) >> 31);
       
   179         ix1 += ix1;
       
   180         r >>= 1;
       
   181     }
       
   182 
       
   183     /* use floating add to find out rounding direction */
       
   184     if ((ix0 | ix1) != 0) {
       
   185         z = one - tiny;         /* trigger inexact flag */
       
   186         if (z >= one) {
       
   187             z = one + tiny;
       
   188             if (q1 == (u_int32_t) 0xffffffff) {
       
   189                 q1 = 0;
       
   190                 q += 1;
       
   191             } else if (z > one) {
       
   192                 if (q1 == (u_int32_t) 0xfffffffe)
       
   193                     q += 1;
       
   194                 q1 += 2;
       
   195             } else
       
   196                 q1 += (q1 & 1);
       
   197         }
       
   198     }
       
   199     ix0 = (q >> 1) + 0x3fe00000;
       
   200     ix1 = q1 >> 1;
       
   201     if ((q & 1) == 1)
       
   202         ix1 |= sign;
       
   203     ix0 += (m << 20);
       
   204     INSERT_WORDS(z, ix0, ix1);
       
   205     return z;
       
   206 }
       
   207 
       
   208 /*
       
   209 Other methods  (use floating-point arithmetic)
       
   210 -------------
       
   211 (This is a copy of a drafted paper by Prof W. Kahan
       
   212 and K.C. Ng, written in May, 1986)
       
   213 
       
   214 	Two algorithms are given here to implement sqrt(x)
       
   215 	(IEEE double precision arithmetic) in software.
       
   216 	Both supply sqrt(x) correctly rounded. The first algorithm (in
       
   217 	Section A) uses newton iterations and involves four divisions.
       
   218 	The second one uses reciproot iterations to avoid division, but
       
   219 	requires more multiplications. Both algorithms need the ability
       
   220 	to chop results of arithmetic operations instead of round them,
       
   221 	and the INEXACT flag to indicate when an arithmetic operation
       
   222 	is executed exactly with no roundoff error, all part of the
       
   223 	standard (IEEE 754-1985). The ability to perform shift, add,
       
   224 	subtract and logical AND operations upon 32-bit words is needed
       
   225 	too, though not part of the standard.
       
   226 
       
   227 A.  sqrt(x) by Newton Iteration
       
   228 
       
   229    (1)	Initial approximation
       
   230 
       
   231 	Let x0 and x1 be the leading and the trailing 32-bit words of
       
   232 	a floating point number x (in IEEE double format) respectively
       
   233 
       
   234 	    1    11		     52				  ...widths
       
   235 	   ------------------------------------------------------
       
   236 	x: |s|	  e     |	      f				|
       
   237 	   ------------------------------------------------------
       
   238 	      msb    lsb  msb				      lsb ...order
       
   239 
       
   240 
       
   241 	     ------------------------  	     ------------------------
       
   242 	x0:  |s|   e    |    f1     |	 x1: |          f2           |
       
   243 	     ------------------------  	     ------------------------
       
   244 
       
   245 	By performing shifts and subtracts on x0 and x1 (both regarded
       
   246 	as integers), we obtain an 8-bit approximation of sqrt(x) as
       
   247 	follows.
       
   248 
       
   249 		k  := (x0>>1) + 0x1ff80000;
       
   250 		y0 := k - T1[31&(k>>15)].	... y ~ sqrt(x) to 8 bits
       
   251 	Here k is a 32-bit integer and T1[] is an integer array containing
       
   252 	correction terms. Now magically the floating value of y (y's
       
   253 	leading 32-bit word is y0, the value of its trailing word is 0)
       
   254 	approximates sqrt(x) to almost 8-bit.
       
   255 
       
   256 	Value of T1:
       
   257 	static int T1[32]= {
       
   258 	0,	1024,	3062,	5746,	9193,	13348,	18162,	23592,
       
   259 	29598,	36145,	43202,	50740,	58733,	67158,	75992,	85215,
       
   260 	83599,	71378,	60428,	50647,	41945,	34246,	27478,	21581,
       
   261 	16499,	12183,	8588,	5674,	3403,	1742,	661,	130,};
       
   262 
       
   263     (2)	Iterative refinement
       
   264 
       
   265 	Apply Heron's rule three times to y, we have y approximates
       
   266 	sqrt(x) to within 1 ulp (Unit in the Last Place):
       
   267 
       
   268 		y := (y+x/y)/2		... almost 17 sig. bits
       
   269 		y := (y+x/y)/2		... almost 35 sig. bits
       
   270 		y := y-(y-x/y)/2	... within 1 ulp
       
   271 
       
   272 
       
   273 	Remark 1.
       
   274 	    Another way to improve y to within 1 ulp is:
       
   275 
       
   276 		y := (y+x/y)		... almost 17 sig. bits to 2*sqrt(x)
       
   277 		y := y - 0x00100006	... almost 18 sig. bits to sqrt(x)
       
   278 
       
   279 				2
       
   280 			    (x-y )*y
       
   281 		y := y + 2* ----------	...within 1 ulp
       
   282 			       2
       
   283 			     3y  + x
       
   284 
       
   285 
       
   286 	This formula has one division fewer than the one above; however,
       
   287 	it requires more multiplications and additions. Also x must be
       
   288 	scaled in advance to avoid spurious overflow in evaluating the
       
   289 	expression 3y*y+x. Hence it is not recommended uless division
       
   290 	is slow. If division is very slow, then one should use the
       
   291 	reciproot algorithm given in section B.
       
   292 
       
   293     (3) Final adjustment
       
   294 
       
   295 	By twiddling y's last bit it is possible to force y to be
       
   296 	correctly rounded according to the prevailing rounding mode
       
   297 	as follows. Let r and i be copies of the rounding mode and
       
   298 	inexact flag before entering the square root program. Also we
       
   299 	use the expression y+-ulp for the next representable floating
       
   300 	numbers (up and down) of y. Note that y+-ulp = either fixed
       
   301 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
       
   302 	mode.
       
   303 
       
   304 		I := FALSE;	... reset INEXACT flag I
       
   305 		R := RZ;	... set rounding mode to round-toward-zero
       
   306 		z := x/y;	... chopped quotient, possibly inexact
       
   307 		If(not I) then {	... if the quotient is exact
       
   308 		    if(z=y) {
       
   309 		        I := i;	 ... restore inexact flag
       
   310 		        R := r;  ... restore rounded mode
       
   311 		        return sqrt(x):=y.
       
   312 		    } else {
       
   313 			z := z - ulp;	... special rounding
       
   314 		    }
       
   315 		}
       
   316 		i := TRUE;		... sqrt(x) is inexact
       
   317 		If (r=RN) then z=z+ulp	... rounded-to-nearest
       
   318 		If (r=RP) then {	... round-toward-+inf
       
   319 		    y = y+ulp; z=z+ulp;
       
   320 		}
       
   321 		y := y+z;		... chopped sum
       
   322 		y0:=y0-0x00100000;	... y := y/2 is correctly rounded.
       
   323 	        I := i;	 		... restore inexact flag
       
   324 	        R := r;  		... restore rounded mode
       
   325 	        return sqrt(x):=y.
       
   326 
       
   327     (4)	Special cases
       
   328 
       
   329 	Square root of +inf, +-0, or NaN is itself;
       
   330 	Square root of a negative number is NaN with invalid signal.
       
   331 
       
   332 
       
   333 B.  sqrt(x) by Reciproot Iteration
       
   334 
       
   335    (1)	Initial approximation
       
   336 
       
   337 	Let x0 and x1 be the leading and the trailing 32-bit words of
       
   338 	a floating point number x (in IEEE double format) respectively
       
   339 	(see section A). By performing shifs and subtracts on x0 and y0,
       
   340 	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
       
   341 
       
   342 	    k := 0x5fe80000 - (x0>>1);
       
   343 	    y0:= k - T2[63&(k>>14)].	... y ~ 1/sqrt(x) to 7.8 bits
       
   344 
       
   345 	Here k is a 32-bit integer and T2[] is an integer array
       
   346 	containing correction terms. Now magically the floating
       
   347 	value of y (y's leading 32-bit word is y0, the value of
       
   348 	its trailing word y1 is set to zero) approximates 1/sqrt(x)
       
   349 	to almost 7.8-bit.
       
   350 
       
   351 	Value of T2:
       
   352 	static int T2[64]= {
       
   353 	0x1500,	0x2ef8,	0x4d67,	0x6b02,	0x87be,	0xa395,	0xbe7a,	0xd866,
       
   354 	0xf14a,	0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
       
   355 	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
       
   356 	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
       
   357 	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
       
   358 	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
       
   359 	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
       
   360 	0x1527f,0x1334a,0x11051,0xe951,	0xbe01,	0x8e0d,	0x5924,	0x1edd,};
       
   361 
       
   362     (2)	Iterative refinement
       
   363 
       
   364 	Apply Reciproot iteration three times to y and multiply the
       
   365 	result by x to get an approximation z that matches sqrt(x)
       
   366 	to about 1 ulp. To be exact, we will have
       
   367 		-1ulp < sqrt(x)-z<1.0625ulp.
       
   368 
       
   369 	... set rounding mode to Round-to-nearest
       
   370 	   y := y*(1.5-0.5*x*y*y)	... almost 15 sig. bits to 1/sqrt(x)
       
   371 	   y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
       
   372 	... special arrangement for better accuracy
       
   373 	   z := x*y			... 29 bits to sqrt(x), with z*y<1
       
   374 	   z := z + 0.5*z*(1-z*y)	... about 1 ulp to sqrt(x)
       
   375 
       
   376 	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
       
   377 	(a) the term z*y in the final iteration is always less than 1;
       
   378 	(b) the error in the final result is biased upward so that
       
   379 		-1 ulp < sqrt(x) - z < 1.0625 ulp
       
   380 	    instead of |sqrt(x)-z|<1.03125ulp.
       
   381 
       
   382     (3)	Final adjustment
       
   383 
       
   384 	By twiddling y's last bit it is possible to force y to be
       
   385 	correctly rounded according to the prevailing rounding mode
       
   386 	as follows. Let r and i be copies of the rounding mode and
       
   387 	inexact flag before entering the square root program. Also we
       
   388 	use the expression y+-ulp for the next representable floating
       
   389 	numbers (up and down) of y. Note that y+-ulp = either fixed
       
   390 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
       
   391 	mode.
       
   392 
       
   393 	R := RZ;		... set rounding mode to round-toward-zero
       
   394 	switch(r) {
       
   395 	    case RN:		... round-to-nearest
       
   396 	       if(x<= z*(z-ulp)...chopped) z = z - ulp; else
       
   397 	       if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
       
   398 	       break;
       
   399 	    case RZ:case RM:	... round-to-zero or round-to--inf
       
   400 	       R:=RP;		... reset rounding mod to round-to-+inf
       
   401 	       if(x<z*z ... rounded up) z = z - ulp; else
       
   402 	       if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
       
   403 	       break;
       
   404 	    case RP:		... round-to-+inf
       
   405 	       if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
       
   406 	       if(x>z*z ...chopped) z = z+ulp;
       
   407 	       break;
       
   408 	}
       
   409 
       
   410 	Remark 3. The above comparisons can be done in fixed point. For
       
   411 	example, to compare x and w=z*z chopped, it suffices to compare
       
   412 	x1 and w1 (the trailing parts of x and w), regarding them as
       
   413 	two's complement integers.
       
   414 
       
   415 	...Is z an exact square root?
       
   416 	To determine whether z is an exact square root of x, let z1 be the
       
   417 	trailing part of z, and also let x0 and x1 be the leading and
       
   418 	trailing parts of x.
       
   419 
       
   420 	If ((z1&0x03ffffff)!=0)	... not exact if trailing 26 bits of z!=0
       
   421 	    I := 1;		... Raise Inexact flag: z is not exact
       
   422 	else {
       
   423 	    j := 1 - [(x0>>20)&1]	... j = logb(x) mod 2
       
   424 	    k := z1 >> 26;		... get z's 25-th and 26-th
       
   425 					    fraction bits
       
   426 	    I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
       
   427 	}
       
   428 	R:= r		... restore rounded mode
       
   429 	return sqrt(x):=z.
       
   430 
       
   431 	If multiplication is cheaper then the foregoing red tape, the
       
   432 	Inexact flag can be evaluated by
       
   433 
       
   434 	    I := i;
       
   435 	    I := (z*z!=x) or I.
       
   436 
       
   437 	Note that z*z can overwrite I; this value must be sensed if it is
       
   438 	True.
       
   439 
       
   440 	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
       
   441 	zero.
       
   442 
       
   443 		    --------------------
       
   444 		z1: |        f2        |
       
   445 		    --------------------
       
   446 		bit 31		   bit 0
       
   447 
       
   448 	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
       
   449 	or even of logb(x) have the following relations:
       
   450 
       
   451 	-------------------------------------------------
       
   452 	bit 27,26 of z1		bit 1,0 of x1	logb(x)
       
   453 	-------------------------------------------------
       
   454 	00			00		odd and even
       
   455 	01			01		even
       
   456 	10			10		odd
       
   457 	10			00		even
       
   458 	11			01		even
       
   459 	-------------------------------------------------
       
   460 
       
   461     (4)	Special cases (see (4) of Section A).
       
   462 
       
   463  */