
1 /* @(#)e_sqrt.c 5.1 93/09/24 */ 

2 /* 

3 * ==================================================== 

4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 

5 * 

6 * Developed at SunPro, a Sun Microsystems, Inc. business. 

7 * Permission to use, copy, modify, and distribute this 

8 * software is freely granted, provided that this notice 

9 * is preserved. 

10 * ==================================================== 

11 */ 

12 

13 #if defined(LIBM_SCCS) && !defined(lint) 

14 static char rcsid[] = "$NetBSD: e_sqrt.c,v 1.8 1995/05/10 20:46:17 jtc Exp $"; 

15 #endif 

16 

17 /* __ieee754_sqrt(x) 

18 * Return correctly rounded sqrt. 

19 *  

20 *  Use the hardware sqrt if you have one  

21 *  

22 * Method: 

23 * Bit by bit method using integer arithmetic. (Slow, but portable) 

24 * 1. Normalization 

25 * Scale x to y in [1,4) with even powers of 2: 

26 * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then 

27 * sqrt(x) = 2^k * sqrt(y) 

28 * 2. Bit by bit computation 

29 * Let q = sqrt(y) truncated to i bit after binary point (q = 1), 

30 * i 0 

31 * i+1 2 

32 * s = 2*q , and y = 2 * ( y  q ). (1) 

33 * i i i i 

34 * 

35 * To compute q from q , one checks whether 

36 * i+1 i 

37 * 

38 * (i+1) 2 

39 * (q + 2 ) <= y. (2) 

40 * i 

41 * (i+1) 

42 * If (2) is false, then q = q ; otherwise q = q + 2 . 

43 * i+1 i i+1 i 

44 * 

45 * With some algebric manipulation, it is not difficult to see 

46 * that (2) is equivalent to 

47 * (i+1) 

48 * s + 2 <= y (3) 

49 * i i 

50 * 

51 * The advantage of (3) is that s and y can be computed by 

52 * i i 

53 * the following recurrence formula: 

54 * if (3) is false 

55 * 

56 * s = s , y = y ; (4) 

57 * i+1 i i+1 i 

58 * 

59 * otherwise, 

60 * i (i+1) 

61 * s = s + 2 , y = y  s  2 (5) 

62 * i+1 i i+1 i i 

63 * 

64 * One may easily use induction to prove (4) and (5). 

65 * Note. Since the left hand side of (3) contain only i+2 bits, 

66 * it does not necessary to do a full (53bit) comparison 

67 * in (3). 

68 * 3. Final rounding 

69 * After generating the 53 bits result, we compute one more bit. 

70 * Together with the remainder, we can decide whether the 

71 * result is exact, bigger than 1/2ulp, or less than 1/2ulp 

72 * (it will never equal to 1/2ulp). 

73 * The rounding mode can be detected by checking whether 

74 * huge + tiny is equal to huge, and whether huge  tiny is 

75 * equal to huge for some floating point number "huge" and "tiny". 

76 * 

77 * Special cases: 

78 * sqrt(+0) = +0 ... exact 

79 * sqrt(inf) = inf 

80 * sqrt(ve) = NaN ... with invalid signal 

81 * sqrt(NaN) = NaN ... with invalid signal for signaling NaN 

82 * 

83 * Other methods : see the appended file at the end of the program below. 

84 * 

85 */ 

86 

87 #include "math.h" 

88 #include "math_private.h" 

89 

90 #ifdef __STDC__ 

91 static const double one = 1.0, tiny = 1.0e300; 

92 #else 

93 static double one = 1.0, tiny = 1.0e300; 

94 #endif 

95 

96 #ifdef __STDC__ 

97 double attribute_hidden 

98 __ieee754_sqrt(double x) 

99 #else 

100 double attribute_hidden 

101 __ieee754_sqrt(x) 

102 double x; 

103 #endif 

104 { 

105 double z; 

106 int32_t sign = (int) 0x80000000; 

107 int32_t ix0, s0, q, m, t, i; 

108 u_int32_t r, t1, s1, ix1, q1; 

109 

110 EXTRACT_WORDS(ix0, ix1, x); 

111 

112 /* take care of Inf and NaN */ 

113 if ((ix0 & 0x7ff00000) == 0x7ff00000) { 

114 return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf 

115 sqrt(inf)=sNaN */ 

116 } 

117 /* take care of zero */ 

118 if (ix0 <= 0) { 

119 if (((ix0 & (~sign))  ix1) == 0) 

120 return x; /* sqrt(+0) = +0 */ 

121 else if (ix0 < 0) 

122 return (x  x) / (x  x); /* sqrt(ve) = sNaN */ 

123 } 

124 /* normalize x */ 

125 m = (ix0 >> 20); 

126 if (m == 0) { /* subnormal x */ 

127 while (ix0 == 0) { 

128 m = 21; 

129 ix0 = (ix1 >> 11); 

130 ix1 <<= 21; 

131 } 

132 for (i = 0; (ix0 & 0x00100000) == 0; i++) 

133 ix0 <<= 1; 

134 m = i  1; 

135 ix0 = (ix1 >> (32  i)); 

136 ix1 <<= i; 

137 } 

138 m = 1023; /* unbias exponent */ 

139 ix0 = (ix0 & 0x000fffff)  0x00100000; 

140 if (m & 1) { /* odd m, double x to make it even */ 

141 ix0 += ix0 + ((ix1 & sign) >> 31); 

142 ix1 += ix1; 

143 } 

144 m >>= 1; /* m = [m/2] */ 

145 

146 /* generate sqrt(x) bit by bit */ 

147 ix0 += ix0 + ((ix1 & sign) >> 31); 

148 ix1 += ix1; 

149 q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ 

150 r = 0x00200000; /* r = moving bit from right to left */ 

151 

152 while (r != 0) { 

153 t = s0 + r; 

154 if (t <= ix0) { 

155 s0 = t + r; 

156 ix0 = t; 

157 q += r; 

158 } 

159 ix0 += ix0 + ((ix1 & sign) >> 31); 

160 ix1 += ix1; 

161 r >>= 1; 

162 } 

163 

164 r = sign; 

165 while (r != 0) { 

166 t1 = s1 + r; 

167 t = s0; 

168 if ((t < ix0)  ((t == ix0) && (t1 <= ix1))) { 

169 s1 = t1 + r; 

170 if (((t1 & sign) == sign) && (s1 & sign) == 0) 

171 s0 += 1; 

172 ix0 = t; 

173 if (ix1 < t1) 

174 ix0 = 1; 

175 ix1 = t1; 

176 q1 += r; 

177 } 

178 ix0 += ix0 + ((ix1 & sign) >> 31); 

179 ix1 += ix1; 

180 r >>= 1; 

181 } 

182 

183 /* use floating add to find out rounding direction */ 

184 if ((ix0  ix1) != 0) { 

185 z = one  tiny; /* trigger inexact flag */ 

186 if (z >= one) { 

187 z = one + tiny; 

188 if (q1 == (u_int32_t) 0xffffffff) { 

189 q1 = 0; 

190 q += 1; 

191 } else if (z > one) { 

192 if (q1 == (u_int32_t) 0xfffffffe) 

193 q += 1; 

194 q1 += 2; 

195 } else 

196 q1 += (q1 & 1); 

197 } 

198 } 

199 ix0 = (q >> 1) + 0x3fe00000; 

200 ix1 = q1 >> 1; 

201 if ((q & 1) == 1) 

202 ix1 = sign; 

203 ix0 += (m << 20); 

204 INSERT_WORDS(z, ix0, ix1); 

205 return z; 

206 } 

207 

208 /* 

209 Other methods (use floatingpoint arithmetic) 

210  

211 (This is a copy of a drafted paper by Prof W. Kahan 

212 and K.C. Ng, written in May, 1986) 

213 

214 Two algorithms are given here to implement sqrt(x) 

215 (IEEE double precision arithmetic) in software. 

216 Both supply sqrt(x) correctly rounded. The first algorithm (in 

217 Section A) uses newton iterations and involves four divisions. 

218 The second one uses reciproot iterations to avoid division, but 

219 requires more multiplications. Both algorithms need the ability 

220 to chop results of arithmetic operations instead of round them, 

221 and the INEXACT flag to indicate when an arithmetic operation 

222 is executed exactly with no roundoff error, all part of the 

223 standard (IEEE 7541985). The ability to perform shift, add, 

224 subtract and logical AND operations upon 32bit words is needed 

225 too, though not part of the standard. 

226 

227 A. sqrt(x) by Newton Iteration 

228 

229 (1) Initial approximation 

230 

231 Let x0 and x1 be the leading and the trailing 32bit words of 

232 a floating point number x (in IEEE double format) respectively 

233 

234 1 11 52 ...widths 

235  

236 x: s e  f  

237  

238 msb lsb msb lsb ...order 

239 

240 

241   

242 x0: s e  f1  x1:  f2  

243   

244 

245 By performing shifts and subtracts on x0 and x1 (both regarded 

246 as integers), we obtain an 8bit approximation of sqrt(x) as 

247 follows. 

248 

249 k := (x0>>1) + 0x1ff80000; 

250 y0 := k  T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits 

251 Here k is a 32bit integer and T1[] is an integer array containing 

252 correction terms. Now magically the floating value of y (y's 

253 leading 32bit word is y0, the value of its trailing word is 0) 

254 approximates sqrt(x) to almost 8bit. 

255 

256 Value of T1: 

257 static int T1[32]= { 

258 0, 1024, 3062, 5746, 9193, 13348, 18162, 23592, 

259 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215, 

260 83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581, 

261 16499, 12183, 8588, 5674, 3403, 1742, 661, 130,}; 

262 

263 (2) Iterative refinement 

264 

265 Apply Heron's rule three times to y, we have y approximates 

266 sqrt(x) to within 1 ulp (Unit in the Last Place): 

267 

268 y := (y+x/y)/2 ... almost 17 sig. bits 

269 y := (y+x/y)/2 ... almost 35 sig. bits 

270 y := y(yx/y)/2 ... within 1 ulp 

271 

272 

273 Remark 1. 

274 Another way to improve y to within 1 ulp is: 

275 

276 y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x) 

277 y := y  0x00100006 ... almost 18 sig. bits to sqrt(x) 

278 

279 2 

280 (xy )*y 

281 y := y + 2*  ...within 1 ulp 

282 2 

283 3y + x 

284 

285 

286 This formula has one division fewer than the one above; however, 

287 it requires more multiplications and additions. Also x must be 

288 scaled in advance to avoid spurious overflow in evaluating the 

289 expression 3y*y+x. Hence it is not recommended uless division 

290 is slow. If division is very slow, then one should use the 

291 reciproot algorithm given in section B. 

292 

293 (3) Final adjustment 

294 

295 By twiddling y's last bit it is possible to force y to be 

296 correctly rounded according to the prevailing rounding mode 

297 as follows. Let r and i be copies of the rounding mode and 

298 inexact flag before entering the square root program. Also we 

299 use the expression y+ulp for the next representable floating 

300 numbers (up and down) of y. Note that y+ulp = either fixed 

301 point y+1, or multiply y by nextafter(1,+inf) in chopped 

302 mode. 

303 

304 I := FALSE; ... reset INEXACT flag I 

305 R := RZ; ... set rounding mode to roundtowardzero 

306 z := x/y; ... chopped quotient, possibly inexact 

307 If(not I) then { ... if the quotient is exact 

308 if(z=y) { 

309 I := i; ... restore inexact flag 

310 R := r; ... restore rounded mode 

311 return sqrt(x):=y. 

312 } else { 

313 z := z  ulp; ... special rounding 

314 } 

315 } 

316 i := TRUE; ... sqrt(x) is inexact 

317 If (r=RN) then z=z+ulp ... roundedtonearest 

318 If (r=RP) then { ... roundtoward+inf 

319 y = y+ulp; z=z+ulp; 

320 } 

321 y := y+z; ... chopped sum 

322 y0:=y00x00100000; ... y := y/2 is correctly rounded. 

323 I := i; ... restore inexact flag 

324 R := r; ... restore rounded mode 

325 return sqrt(x):=y. 

326 

327 (4) Special cases 

328 

329 Square root of +inf, +0, or NaN is itself; 

330 Square root of a negative number is NaN with invalid signal. 

331 

332 

333 B. sqrt(x) by Reciproot Iteration 

334 

335 (1) Initial approximation 

336 

337 Let x0 and x1 be the leading and the trailing 32bit words of 

338 a floating point number x (in IEEE double format) respectively 

339 (see section A). By performing shifs and subtracts on x0 and y0, 

340 we obtain a 7.8bit approximation of 1/sqrt(x) as follows. 

341 

342 k := 0x5fe80000  (x0>>1); 

343 y0:= k  T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits 

344 

345 Here k is a 32bit integer and T2[] is an integer array 

346 containing correction terms. Now magically the floating 

347 value of y (y's leading 32bit word is y0, the value of 

348 its trailing word y1 is set to zero) approximates 1/sqrt(x) 

349 to almost 7.8bit. 

350 

351 Value of T2: 

352 static int T2[64]= { 

353 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, 

354 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, 

355 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, 

356 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, 

357 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, 

358 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, 

359 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, 

360 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,}; 

361 

362 (2) Iterative refinement 

363 

364 Apply Reciproot iteration three times to y and multiply the 

365 result by x to get an approximation z that matches sqrt(x) 

366 to about 1 ulp. To be exact, we will have 

367 1ulp < sqrt(x)z<1.0625ulp. 

368 

369 ... set rounding mode to Roundtonearest 

370 y := y*(1.50.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x) 

371 y := y*((1.52^30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x) 

372 ... special arrangement for better accuracy 

373 z := x*y ... 29 bits to sqrt(x), with z*y<1 

374 z := z + 0.5*z*(1z*y) ... about 1 ulp to sqrt(x) 

375 

376 Remark 2. The constant 1.52^30 is chosen to bias the error so that 

377 (a) the term z*y in the final iteration is always less than 1; 

378 (b) the error in the final result is biased upward so that 

379 1 ulp < sqrt(x)  z < 1.0625 ulp 

380 instead of sqrt(x)z<1.03125ulp. 

381 

382 (3) Final adjustment 

383 

384 By twiddling y's last bit it is possible to force y to be 

385 correctly rounded according to the prevailing rounding mode 

386 as follows. Let r and i be copies of the rounding mode and 

387 inexact flag before entering the square root program. Also we 

388 use the expression y+ulp for the next representable floating 

389 numbers (up and down) of y. Note that y+ulp = either fixed 

390 point y+1, or multiply y by nextafter(1,+inf) in chopped 

391 mode. 

392 

393 R := RZ; ... set rounding mode to roundtowardzero 

394 switch(r) { 

395 case RN: ... roundtonearest 

396 if(x<= z*(zulp)...chopped) z = z  ulp; else 

397 if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp; 

398 break; 

399 case RZ:case RM: ... roundtozero or roundtoinf 

400 R:=RP; ... reset rounding mod to roundto+inf 

401 if(x<z*z ... rounded up) z = z  ulp; else 

402 if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp; 

403 break; 

404 case RP: ... roundto+inf 

405 if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else 

406 if(x>z*z ...chopped) z = z+ulp; 

407 break; 

408 } 

409 

410 Remark 3. The above comparisons can be done in fixed point. For 

411 example, to compare x and w=z*z chopped, it suffices to compare 

412 x1 and w1 (the trailing parts of x and w), regarding them as 

413 two's complement integers. 

414 

415 ...Is z an exact square root? 

416 To determine whether z is an exact square root of x, let z1 be the 

417 trailing part of z, and also let x0 and x1 be the leading and 

418 trailing parts of x. 

419 

420 If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0 

421 I := 1; ... Raise Inexact flag: z is not exact 

422 else { 

423 j := 1  [(x0>>20)&1] ... j = logb(x) mod 2 

424 k := z1 >> 26; ... get z's 25th and 26th 

425 fraction bits 

426 I := i or (k&j) or ((k&(j+j+1))!=(x1&3)); 

427 } 

428 R:= r ... restore rounded mode 

429 return sqrt(x):=z. 

430 

431 If multiplication is cheaper then the foregoing red tape, the 

432 Inexact flag can be evaluated by 

433 

434 I := i; 

435 I := (z*z!=x) or I. 

436 

437 Note that z*z can overwrite I; this value must be sensed if it is 

438 True. 

439 

440 Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be 

441 zero. 

442 

443  

444 z1:  f2  

445  

446 bit 31 bit 0 

447 

448 Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd 

449 or even of logb(x) have the following relations: 

450 

451  

452 bit 27,26 of z1 bit 1,0 of x1 logb(x) 

453  

454 00 00 odd and even 

455 01 01 even 

456 10 10 odd 

457 10 00 even 

458 11 01 even 

459  

460 

461 (4) Special cases (see (4) of Section A). 

462 

463 */ 