2757

1 
/* @(#)k_rem_pio2.c 5.1 93/09/24 */


2 
/*


3 
* ====================================================


4 
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.


5 
*


6 
* Developed at SunPro, a Sun Microsystems, Inc. business.


7 
* Permission to use, copy, modify, and distribute this


8 
* software is freely granted, provided that this notice


9 
* is preserved.


10 
* ====================================================


11 
*/


12 


13 
#if defined(LIBM_SCCS) && !defined(lint)


14 
static char rcsid[] =


15 
"$NetBSD: k_rem_pio2.c,v 1.7 1995/05/10 20:46:25 jtc Exp $";


16 
#endif


17 


18 
/*


19 
* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)


20 
* double x[],y[]; int e0,nx,prec; int ipio2[];


21 
*


22 
* __kernel_rem_pio2 return the last three digits of N with


23 
* y = x  N*pi/2


24 
* so that y < pi/2.


25 
*


26 
* The method is to compute the integer (mod 8) and fraction parts of


27 
* (2/pi)*x without doing the full multiplication. In general we


28 
* skip the part of the product that are known to be a huge integer (


29 
* more accurately, = 0 mod 8 ). Thus the number of operations are


30 
* independent of the exponent of the input.


31 
*


32 
* (2/pi) is represented by an array of 24bit integers in ipio2[].


33 
*


34 
* Input parameters:


35 
* x[] The input value (must be positive) is broken into nx


36 
* pieces of 24bit integers in double precision format.


37 
* x[i] will be the ith 24 bit of x. The scaled exponent


38 
* of x[0] is given in input parameter e0 (i.e., x[0]*2^e0


39 
* match x's up to 24 bits.


40 
*


41 
* Example of breaking a double positive z into x[0]+x[1]+x[2]:


42 
* e0 = ilogb(z)23


43 
* z = scalbn(z,e0)


44 
* for i = 0,1,2


45 
* x[i] = floor(z)


46 
* z = (zx[i])*2**24


47 
*


48 
*


49 
* y[] ouput result in an array of double precision numbers.


50 
* The dimension of y[] is:


51 
* 24bit precision 1


52 
* 53bit precision 2


53 
* 64bit precision 2


54 
* 113bit precision 3


55 
* The actual value is the sum of them. Thus for 113bit


56 
* precison, one may have to do something like:


57 
*


58 
* long double t,w,r_head, r_tail;


59 
* t = (long double)y[2] + (long double)y[1];


60 
* w = (long double)y[0];


61 
* r_head = t+w;


62 
* r_tail = w  (r_head  t);


63 
*


64 
* e0 The exponent of x[0]


65 
*


66 
* nx dimension of x[]


67 
*


68 
* prec an integer indicating the precision:


69 
* 0 24 bits (single)


70 
* 1 53 bits (double)


71 
* 2 64 bits (extended)


72 
* 3 113 bits (quad)


73 
*


74 
* ipio2[]


75 
* integer array, contains the (24*i)th to (24*i+23)th


76 
* bit of 2/pi after binary point. The corresponding


77 
* floating value is


78 
*


79 
* ipio2[i] * 2^(24(i+1)).


80 
*


81 
* External function:


82 
* double scalbn(), floor();


83 
*


84 
*


85 
* Here is the description of some local variables:


86 
*


87 
* jk jk+1 is the initial number of terms of ipio2[] needed


88 
* in the computation. The recommended value is 2,3,4,


89 
* 6 for single, double, extended,and quad.


90 
*


91 
* jz local integer variable indicating the number of


92 
* terms of ipio2[] used.


93 
*


94 
* jx nx  1


95 
*


96 
* jv index for pointing to the suitable ipio2[] for the


97 
* computation. In general, we want


98 
* ( 2^e0*x[0] * ipio2[jv1]*2^(24jv) )/8


99 
* is an integer. Thus


100 
* e0324*jv >= 0 or (e03)/24 >= jv


101 
* Hence jv = max(0,(e03)/24).


102 
*


103 
* jp jp+1 is the number of terms in PIo2[] needed, jp = jk.


104 
*


105 
* q[] double array with integral value, representing the


106 
* 24bits chunk of the product of x and 2/pi.


107 
*


108 
* q0 the corresponding exponent of q[0]. Note that the


109 
* exponent for q[i] would be q024*i.


110 
*


111 
* PIo2[] double precision array, obtained by cutting pi/2


112 
* into 24 bits chunks.


113 
*


114 
* f[] ipio2[] in floating point


115 
*


116 
* iq[] integer array by breaking up q[] in 24bits chunk.


117 
*


118 
* fq[] final product of x*(2/pi) in fq[0],..,fq[jk]


119 
*


120 
* ih integer. If >0 it indicates q[] is >= 0.5, hence


121 
* it also indicates the *sign* of the result.


122 
*


123 
*/


124 


125 


126 
/*


127 
* Constants:


128 
* The hexadecimal values are the intended ones for the following


129 
* constants. The decimal values may be used, provided that the


130 
* compiler will convert from decimal to binary accurately enough


131 
* to produce the hexadecimal values shown.


132 
*/


133 


134 
#include "math.h"


135 
#include "math_private.h"


136 


137 
libm_hidden_proto(scalbn)


138 
libm_hidden_proto(floor)


139 
#ifdef __STDC__


140 
static const int init_jk[] = { 2, 3, 4, 6 }; /* initial value for jk */


141 
#else


142 
static int init_jk[] = { 2, 3, 4, 6 };


143 
#endif


144 


145 
#ifdef __STDC__


146 
static const double PIo2[] = {


147 
#else


148 
static double PIo2[] = {


149 
#endif


150 
1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */


151 
7.54978941586159635335e08, /* 0x3E74442D, 0x00000000 */


152 
5.39030252995776476554e15, /* 0x3CF84698, 0x80000000 */


153 
3.28200341580791294123e22, /* 0x3B78CC51, 0x60000000 */


154 
1.27065575308067607349e29, /* 0x39F01B83, 0x80000000 */


155 
1.22933308981111328932e36, /* 0x387A2520, 0x40000000 */


156 
2.73370053816464559624e44, /* 0x36E38222, 0x80000000 */


157 
2.16741683877804819444e51, /* 0x3569F31D, 0x00000000 */


158 
};


159 


160 
#ifdef __STDC__


161 
static const double


162 
#else


163 
static double


164 
#endif


165 
zero = 0.0, one = 1.0, two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */


166 
twon24 = 5.96046447753906250000e08; /* 0x3E700000, 0x00000000 */


167 


168 
#ifdef __STDC__


169 
int attribute_hidden


170 
__kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,


171 
const int32_t * ipio2)


172 
#else


173 
int attribute_hidden


174 
__kernel_rem_pio2(x, y, e0, nx, prec, ipio2)


175 
double x[], y[];


176 
int e0, nx, prec;


177 
int32_t ipio2[];


178 
#endif


179 
{


180 
int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;


181 
double z, fw, f[20], fq[20], q[20];


182 


183 
/* initialize jk */


184 
jk = init_jk[prec];


185 
jp = jk;


186 


187 
/* determine jx,jv,q0, note that 3>q0 */


188 
jx = nx  1;


189 
jv = (e0  3) / 24;


190 
if (jv < 0)


191 
jv = 0;


192 
q0 = e0  24 * (jv + 1);


193 


194 
/* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */


195 
j = jv  jx;


196 
m = jx + jk;


197 
for (i = 0; i <= m; i++, j++)


198 
f[i] = (j < 0) ? zero : (double) ipio2[j];


199 


200 
/* compute q[0],q[1],...q[jk] */


201 
for (i = 0; i <= jk; i++) {


202 
for (j = 0, fw = 0.0; j <= jx; j++)


203 
fw += x[j] * f[jx + i  j];


204 
q[i] = fw;


205 
}


206 


207 
jz = jk;


208 
recompute:


209 
/* distill q[] into iq[] reversingly */


210 
for (i = 0, j = jz, z = q[jz]; j > 0; i++, j) {


211 
fw = (double) ((int32_t) (twon24 * z));


212 
iq[i] = (int32_t) (z  two24 * fw);


213 
z = q[j  1] + fw;


214 
}


215 


216 
/* compute n */


217 
z = scalbn(z, q0); /* actual value of z */


218 
z = 8.0 * floor(z * 0.125); /* trim off integer >= 8 */


219 
n = (int32_t) z;


220 
z = (double) n;


221 
ih = 0;


222 
if (q0 > 0) { /* need iq[jz1] to determine n */


223 
i = (iq[jz  1] >> (24  q0));


224 
n += i;


225 
iq[jz  1] = i << (24  q0);


226 
ih = iq[jz  1] >> (23  q0);


227 
} else if (q0 == 0)


228 
ih = iq[jz  1] >> 23;


229 
else if (z >= 0.5)


230 
ih = 2;


231 


232 
if (ih > 0) { /* q > 0.5 */


233 
n += 1;


234 
carry = 0;


235 
for (i = 0; i < jz; i++) { /* compute 1q */


236 
j = iq[i];


237 
if (carry == 0) {


238 
if (j != 0) {


239 
carry = 1;


240 
iq[i] = 0x1000000  j;


241 
}


242 
} else


243 
iq[i] = 0xffffff  j;


244 
}


245 
if (q0 > 0) { /* rare case: chance is 1 in 12 */


246 
switch (q0) {


247 
case 1:


248 
iq[jz  1] &= 0x7fffff;


249 
break;


250 
case 2:


251 
iq[jz  1] &= 0x3fffff;


252 
break;


253 
}


254 
}


255 
if (ih == 2) {


256 
z = one  z;


257 
if (carry != 0)


258 
z = scalbn(one, q0);


259 
}


260 
}


261 


262 
/* check if recomputation is needed */


263 
if (z == zero) {


264 
j = 0;


265 
for (i = jz  1; i >= jk; i)


266 
j = iq[i];


267 
if (j == 0) { /* need recomputation */


268 
for (k = 1; iq[jk  k] == 0; k++); /* k = no. of terms needed */


269 


270 
for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */


271 
f[jx + i] = (double) ipio2[jv + i];


272 
for (j = 0, fw = 0.0; j <= jx; j++)


273 
fw += x[j] * f[jx + i  j];


274 
q[i] = fw;


275 
}


276 
jz += k;


277 
goto recompute;


278 
}


279 
}


280 


281 
/* chop off zero terms */


282 
if (z == 0.0) {


283 
jz = 1;


284 
q0 = 24;


285 
while (iq[jz] == 0) {


286 
jz;


287 
q0 = 24;


288 
}


289 
} else { /* break z into 24bit if necessary */


290 
z = scalbn(z, q0);


291 
if (z >= two24) {


292 
fw = (double) ((int32_t) (twon24 * z));


293 
iq[jz] = (int32_t) (z  two24 * fw);


294 
jz += 1;


295 
q0 += 24;


296 
iq[jz] = (int32_t) fw;


297 
} else


298 
iq[jz] = (int32_t) z;


299 
}


300 


301 
/* convert integer "bit" chunk to floatingpoint value */


302 
fw = scalbn(one, q0);


303 
for (i = jz; i >= 0; i) {


304 
q[i] = fw * (double) iq[i];


305 
fw *= twon24;


306 
}


307 


308 
/* compute PIo2[0,...,jp]*q[jz,...,0] */


309 
for (i = jz; i >= 0; i) {


310 
for (fw = 0.0, k = 0; k <= jp && k <= jz  i; k++)


311 
fw += PIo2[k] * q[i + k];


312 
fq[jz  i] = fw;


313 
}


314 


315 
/* compress fq[] into y[] */


316 
switch (prec) {


317 
case 0:


318 
fw = 0.0;


319 
for (i = jz; i >= 0; i)


320 
fw += fq[i];


321 
y[0] = (ih == 0) ? fw : fw;


322 
break;


323 
case 1:


324 
case 2:


325 
fw = 0.0;


326 
for (i = jz; i >= 0; i)


327 
fw += fq[i];


328 
y[0] = (ih == 0) ? fw : fw;


329 
fw = fq[0]  fw;


330 
for (i = 1; i <= jz; i++)


331 
fw += fq[i];


332 
y[1] = (ih == 0) ? fw : fw;


333 
break;


334 
case 3: /* painful */


335 
for (i = jz; i > 0; i) {


336 
fw = fq[i  1] + fq[i];


337 
fq[i] += fq[i  1]  fw;


338 
fq[i  1] = fw;


339 
}


340 
for (i = jz; i > 1; i) {


341 
fw = fq[i  1] + fq[i];


342 
fq[i] += fq[i  1]  fw;


343 
fq[i  1] = fw;


344 
}


345 
for (fw = 0.0, i = jz; i >= 2; i)


346 
fw += fq[i];


347 
if (ih == 0) {


348 
y[0] = fq[0];


349 
y[1] = fq[1];


350 
y[2] = fw;


351 
} else {


352 
y[0] = fq[0];


353 
y[1] = fq[1];


354 
y[2] = fw;


355 
}


356 
}


357 
return n & 7;


358 
}
