Classes | |
union | ieee754 |
Used to switch between different type of interpretations of the data (64 bits) More... | |
Functions | |
VDT_FORCE_INLINE unsigned long long | d2ll (double x) |
Converts a double to an unsigned long long. | |
VDT_FORCE_INLINE double | fast_acos (double x) |
VDT_FORCE_INLINE double | fast_approx_inv (double x) |
VDT_FORCE_INLINE double | fast_approx_isqrt (double x) |
VDT_FORCE_INLINE double | fast_asin (double x) |
VDT_FORCE_INLINE double | fast_atan (double x) |
VDT_FORCE_INLINE double | fast_cos (double x) |
Cos defined between -2pi and 2pi. | |
VDT_FORCE_INLINE double | fast_exp (double x) |
Exponential Function. | |
void | fast_exp_vect_46 (double const *input, double *output, const unsigned int arr_size) |
Some tweaks to make it vectorise with gcc46. | |
VDT_FORCE_INLINE double | fast_inv (double x) |
VDT_FORCE_INLINE double | fast_isqrt (double x) |
VDT_FORCE_INLINE double | fast_isqrt_general (double x, const unsigned short ISQRT_ITERATIONS) |
VDT_FORCE_INLINE double | fast_log (double x) |
void | fast_log_vect_46 (double const *input, double *output, const unsigned int arr_size) |
Some tweaks to make it vectorise with gcc46. | |
VDT_FORCE_INLINE double | fast_sin (double x) |
Sin defined between -2pi and 2pi. | |
VDT_FORCE_INLINE double | fast_tan (double x) |
Sin defined between -2pi and 2pi. | |
VDT_FORCE_INLINE double | getMantExponent (double x, double &fe) |
Like frexp but vectorising and the exponent is a double. | |
VDT_FORCE_INLINE double | ll2d (unsigned long long x) |
Converts an unsigned long long to a double. | |
void | print_instructions_info () |
Print the instructions used on screen. | |
VDT_FORCE_INLINE double | std_inv (double x) |
VDT_FORCE_INLINE void | std_inv_vect (double const *VDT_RESTRICT input, double *VDT_RESTRICT output, const unsigned int arr_size) CMS_VECTORIZE_VERBOSE |
VDT_FORCE_INLINE double | std_isqrt (double x) |
VDT_FORCE_INLINE void | std_isqrt_vect (double const *VDT_RESTRICT input, double *VDT_RESTRICT output, const unsigned int arr_size) CMS_VECTORIZE_VERBOSE |
Variables | |
constexpr double | ATAN_LIMIT = 1e307 |
constexpr double | C1cos = -1.13585365213876817300E-11 |
constexpr double | C1sin = 1.58962301576546568060E-10 |
constexpr double | C2cos = 2.08757008419747316778E-9 |
constexpr double | C2sin = -2.50507477628578072866E-8 |
constexpr double | C3cos = -2.75573141792967388112E-7 |
constexpr double | C3sin = 2.75573136213857245213E-6 |
constexpr double | C4cos = 2.48015872888517045348E-5 |
constexpr double | C4sin = -1.98412698295895385996E-4 |
constexpr double | C5cos = -1.38888888888730564116E-3 |
constexpr double | C5sin = 8.33333333332211858878E-3 |
constexpr double | C6cos = 4.16666666666665929218E-2 |
constexpr double | C6sin = -1.66666666666666307295E-1 |
constexpr double | DP1sc = 7.85398125648498535156E-1 |
constexpr double | DP1tan = 7.853981554508209228515625E-1 |
constexpr double | DP2sc = 3.77489470793079817668E-8 |
constexpr double | DP2tan = 7.94662735614792836714E-9 |
constexpr double | DP3sc = 2.69515142907905952645E-15 |
constexpr double | DP3tan = 3.06161699786838294307E-17 |
constexpr double | EXP_LIMIT = 708. |
constexpr double | LOG2E = 1.4426950408889634073599 |
constexpr double | LOG_LOWER_LIMIT = 1e-307 |
constexpr double | LOG_UPPER_LIMIT = 1e307 |
constexpr double | MOREBITS = 6.123233995736765886130E-17 |
constexpr double | MOREBITSO2 = MOREBITS/2. |
constexpr double | PI = M_PI |
constexpr double | PIO2 = M_PI_2 |
constexpr double | PIO4 = M_PI_4 |
constexpr double | PX1asin = 4.253011369004428248960E-3 |
constexpr double | PX1atan = -8.750608600031904122785E-1 |
constexpr double | PX1exp = 1.26177193074810590878E-4 |
constexpr double | PX1log = 1.01875663804580931796E-4 |
constexpr double | PX1tan = -1.30936939181383777646E4 |
constexpr double | PX2asin = -6.019598008014123785661E-1 |
constexpr double | PX2atan = -1.615753718733365076637E1 |
constexpr double | PX2exp = 3.02994407707441961300E-2 |
constexpr double | PX2log = 4.97494994976747001425E-1 |
constexpr double | PX2tan = 1.15351664838587416140E6 |
constexpr double | PX3asin = 5.444622390564711410273E0 |
constexpr double | PX3atan = -7.500855792314704667340E1 |
constexpr double | PX3exp = 9.99999999999999999910E-1 |
constexpr double | PX3log = 4.70579119878881725854E0 |
constexpr double | PX3tan = -1.79565251976484877988E7 |
constexpr double | PX4asin = -1.626247967210700244449E1 |
constexpr double | PX4atan = -1.228866684490136173410E2 |
constexpr double | PX4log = 1.44989225341610930846E1 |
constexpr double | PX5asin = 1.956261983317594739197E1 |
constexpr double | PX5atan = -6.485021904942025371773E1 |
constexpr double | PX5log = 1.79368678507819816313E1 |
constexpr double | PX6asin = -8.198089802484824371615E0 |
constexpr double | PX6log = 7.70838733755885391666E0 |
constexpr double | QX1asin = -1.474091372988853791896E1 |
constexpr double | QX1atan = - 2.485846490142306297962E1 |
constexpr double | QX1exp = 3.00198505138664455042E-6 |
constexpr double | QX1log = 1.12873587189167450590E1 |
constexpr double | QX1tan = 1.36812963470692954678E4 |
constexpr double | QX2asin = 7.049610280856842141659E1 |
constexpr double | QX2atan = 1.650270098316988542046E2 |
constexpr double | QX2exp = 2.52448340349684104192E-3 |
constexpr double | QX2log = 4.52279145837532221105E1 |
constexpr double | QX2tan = -1.32089234440210967447E6 |
constexpr double | QX3asin = -1.471791292232726029859E2 |
constexpr double | QX3atan = 4.328810604912902668951E2 |
constexpr double | QX3exp = 2.27265548208155028766E-1 |
constexpr double | QX3log = 8.29875266912776603211E1 |
constexpr double | QX3tan = 2.50083801823357915839E7 |
constexpr double | QX4asin = 1.395105614657485689735E2 |
constexpr double | QX4atan = 4.853903996359136964868E2 |
constexpr double | QX4exp = 2.00000000000000000009E0 |
constexpr double | QX4log = 7.11544750618563894466E1 |
constexpr double | QX4tan = -5.38695755929454629881E7 |
constexpr double | QX5asin = -4.918853881490881290097E1 |
constexpr double | QX5atan = 1.945506571482613964425E2 |
constexpr double | QX5log = 2.31251620126765340583E1 |
constexpr double | RX1asin = 2.967721961301243206100E-3 |
constexpr double | RX2asin = -5.634242780008963776856E-1 |
constexpr double | RX3asin = 6.968710824104713396794E0 |
constexpr double | RX4asin = -2.556901049652824852289E1 |
constexpr double | RX5asin = 2.853665548261061424989E1 |
constexpr double | SIN_LOWER_LIMIT = -SIN_UPPER_LIMIT |
constexpr double | SIN_UPPER_LIMIT = TWOPI |
constexpr double | SQRT_LIMIT = 1e307 |
constexpr double | SQRTH = 0.70710678118654752440 |
constexpr double | SX1asin = -2.194779531642920639778E1 |
constexpr double | SX2asin = 1.470656354026814941758E2 |
constexpr double | SX3asin = -3.838770957603691357202E2 |
constexpr double | SX4asin = 3.424398657913078477438E2 |
constexpr double | T3PO8 = 2.41421356237309504880 |
constexpr double | TAN_LIMIT = TWOPI |
constexpr double | TWOPI = 2.*M_PI |
VDT_FORCE_INLINE unsigned long long vdt::d2ll | ( | double | x | ) |
Converts a double to an unsigned long long.
Definition at line 188 of file VDTMath.h.
References vdt::ieee754::d, vdt::ieee754::ll, tmp, and x.
Referenced by fast_isqrt_general(), and getMantExponent().
VDT_FORCE_INLINE double vdt::fast_acos | ( | double | x | ) |
Definition at line 564 of file VDTMath.h.
References fast_asin(), mathSSE::sqrt(), and z.
VDT_FORCE_INLINE double vdt::fast_approx_inv | ( | double | x | ) |
Definition at line 786 of file VDTMath.h.
References fast_approx_isqrt(), x, and detailsBasic3DVector::y.
VDT_FORCE_INLINE double vdt::fast_approx_isqrt | ( | double | x | ) |
Definition at line 768 of file VDTMath.h.
References fast_isqrt_general().
Referenced by fast_approx_inv().
{return fast_isqrt_general(x,3);}
VDT_FORCE_INLINE double vdt::fast_asin | ( | double | x | ) |
Definition at line 413 of file VDTMath.h.
References a, alignCSCRings::e, MOREBITS, AlCaHLTBitMon_ParallelJobs::p, PIO4, PX1asin, PX2asin, PX3asin, PX4asin, PX5asin, PX6asin, QX1asin, QX2asin, QX3asin, QX4asin, QX5asin, RX1asin, RX2asin, RX3asin, RX4asin, RX5asin, mathSSE::sqrt(), SX1asin, SX2asin, SX3asin, SX4asin, x, and z.
Referenced by fast_acos().
{ int sign=1; double a = x; //necessary for linear approx if ( x < 0. ){ sign *= -1; a *= -1; } double p, z, zz; double px,qx; /* arcsin(1-x) = pi/2 - sqrt(2x)(1+R(x)) */ zz = 1.0 - a; px = RX1asin; px*= zz; px+= RX2asin; px*= zz; px+= RX3asin; px*= zz; px+= RX4asin; px*= zz; px+= RX5asin; qx = zz; qx+= SX1asin; qx*= zz; qx+= SX2asin; qx*= zz; qx+= SX3asin; qx*= zz; qx+= SX4asin; p =zz* px/qx; // p = zz * polevl( zz, R, 4)/p1evl( zz, S, 4); zz = std::sqrt(zz+zz); z = PIO4 - zz; zz = zz * p - MOREBITS; z -= zz; z += PIO4; if( a < 0.625 ){ zz = a * a; px = PX1asin; px*= zz; px+= PX2asin; px*= zz; px+= PX3asin; px*= zz; px+= PX4asin; px*= zz; px+= PX5asin; px*= zz; px+= PX6asin; qx = zz; qx+= QX1asin; qx*= zz; qx+= QX2asin; qx*= zz; qx+= QX3asin; qx*= zz; qx+= QX4asin; qx*= zz; qx+= QX5asin; z = zz*px/qx; z = a * z + a; } z *= sign; //linear approx, not sooo needed but seable. Price is cheap though if( a < 1.0e-8 ) z = a; return z; }
VDT_FORCE_INLINE double vdt::fast_atan | ( | double | x | ) |
Definition at line 664 of file VDTMath.h.
References MOREBITS, MOREBITSO2, PIO2, PIO4, PX1atan, PX2atan, PX3atan, PX4atan, PX5atan, QX1atan, QX2atan, QX3atan, QX4atan, QX5atan, T3PO8, x, detailsBasic3DVector::y, and z.
{ /* make argument positive and save the sign */ int sign = 1; if( x < 0.0 ) { x = - x; sign = -1; } /* range reduction */ double originalx=x; // This is slower! // double y = 0.0; // double factor = 0.; // // if (x > .66){ // y = PIO4; // factor = MOREBITSO2; // x = (x-1.0) / (x+1.0); // } // if( originalx > T3PO8 ) { // y = PIO2; // factor = MOREBITS; // x = -1.0 / originalx ; // } double y = PIO4; double factor = MOREBITSO2; x = (x-1.0) / (x+1.0); if( originalx > T3PO8 ) { y = PIO2; //flag = 1.; factor = MOREBITS; x = -1.0 / originalx ; } if ( originalx <= 0.66 ) { y = 0.0; x = originalx; //flag = 0.; factor = 0.; } double z = x * x; double px = PX1atan; px *= z; px += PX2atan; px *= z; px += PX3atan; px *= z; px += PX4atan; px *= z; px += PX5atan; px *= z; // for the final formula double qx=z; qx += QX1atan; qx *=z; qx += QX2atan; qx *=z; qx += QX3atan; qx *=z; qx += QX4atan; qx *=z; qx += QX5atan; // z = px / qx; // z = x * px / qx + x; y = y +x * px / qx + x +factor; y = sign * y; return y; }
VDT_FORCE_INLINE double vdt::fast_cos | ( | double | x | ) |
Cos defined between -2pi and 2pi.
Definition at line 501 of file VDTMath.h.
References abs, C1cos, C1sin, C2cos, C2sin, C3cos, C3sin, C4cos, C4sin, C5cos, C5sin, C6cos, C6sin, DP1sc, DP2sc, DP3sc, j, PI, PIO2, PIO4, TWOPI, x, detailsBasic3DVector::y, and z.
{ x = std::abs(x); if( x > PI ) x = TWOPI - x ; int sign = 1; if( x > PIO2 ){ x = PI - x; sign=-1; } double y = int( x/PIO4 ); // integer part of x/PIO4 int j=0; if (x>PIO4){ j=2; y+=1; sign = -sign; } /* Extended precision modular arithmetic */ double z = ((x - y * DP1sc) - y * DP2sc) - y * DP3sc; double zz = z * z; double px=0; if( j==2 ){ px = C1sin; px *= zz; px += C2sin; px *= zz; px += C3sin; px *= zz; px += C4sin; px *= zz; px += C5sin; px *= zz; px += C6sin; y = z + z * zz * px; } else{ px = C1cos; px *= zz; px += C2cos; px *= zz; px += C3cos; px *= zz; px += C4cos; px *= zz; px += C5cos; px *= zz; px += C6cos; y = 1. - zz * .5 + zz * zz * px; } y *= sign; return y; }
VDT_FORCE_INLINE double vdt::fast_exp | ( | double | x | ) |
Exponential Function.
Definition at line 226 of file VDTMath.h.
References vdt::ieee754::d, EXP_LIMIT, infinity, vdt::ieee754::ll, LOG2E, n, PX1exp, PX2exp, PX3exp, QX1exp, QX2exp, QX3exp, QX4exp, and x.
{ double initial_x = x; // double px =int(LOG2E * x + 0.5); // std::floor(LOG2E * x + 0.5); double px = std::floor(LOG2E * x + 0.5); int n = px; x -= px * 6.93145751953125E-1; x -= px * 1.42860682030941723212E-6; double xx = x * x; // px = x * P(x**2). px = PX1exp; px *= xx; px += PX2exp; px *= xx; px += PX3exp; px *= x; // Evaluate Q(x**2). double qx = QX1exp; qx *= xx; qx += QX2exp; qx *= xx; qx += QX3exp; qx *= xx; qx += QX4exp; // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) ) x = px / (qx - px); x = 1.0 + 2.0 * x; // Build 2^n in double. ieee754 u; u.d = 0; n += 1023; u.ll = (long long) (n) << 52; double res = x * u.d; if (initial_x > EXP_LIMIT) res = std::numeric_limits<double>::infinity(); if (initial_x < -EXP_LIMIT) res = 0.; return res; }
void vdt::fast_exp_vect_46 | ( | double const * | input, |
double * | output, | ||
const unsigned int | arr_size | ||
) |
Some tweaks to make it vectorise with gcc46.
Exponential Function - some tweaks to have it vectorise in gcc46.
Definition at line 63 of file VDTMath.cc.
References vdt::ieee754::d, EXP_LIMIT, i, infinity, vdt::ieee754::ll, LOG2E, n, PX1exp, PX2exp, PX3exp, QX1exp, QX2exp, QX3exp, QX4exp, and x.
{ // input & output must not point to the same memory location // assert( input != output ); int n; int* nv = new int[arr_size]; double xx, px, qx; ieee754 u; // for vectorisation double x; for (unsigned int i = 0; i < arr_size; ++i) nv[i] = n = std::floor( LOG2E * input[i] + 0.5 ); //Profitability threshold = 7 for (unsigned int i = 0; i < arr_size; ++i) { x = input[i]; //nv[i] = n = int(LOG2E * x + 0.5);//std::floor( LOG2E * x + 0.5 ); n=nv[i]; px = n; x -= px * 6.93145751953125E-1; x -= px * 1.42860682030941723212E-6; xx = x * x; // px = x * P(x**2). px = PX1exp; px *= xx; px += PX2exp; px *= xx; px += PX3exp; px *= x; // Evaluate Q(x**2). qx = QX1exp; qx *= xx; qx += QX2exp; qx *= xx; qx += QX3exp; qx *= xx; qx += QX4exp; // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) ) x = px / (qx - px); x = 1.0 + 2.0 * x; // partial output[i]=x; } // end loop on input vals //Profitability threshold = 4 for (unsigned int i = 0; i < arr_size; ++i) { // Build 2^n in double. n=nv[i]; u.d = 0; n += 1023; u.ll = (long long) (n) << 52; output[i] = output[i] * u.d; if (input[i] > EXP_LIMIT) output[i] = std::numeric_limits<double>::infinity(); if (input[i] < -EXP_LIMIT) output[i] = 0.; } delete [] nv; }
VDT_FORCE_INLINE double vdt::fast_inv | ( | double | x | ) |
Definition at line 776 of file VDTMath.h.
References fast_isqrt(), x, and detailsBasic3DVector::y.
VDT_FORCE_INLINE double vdt::fast_isqrt | ( | double | x | ) |
Definition at line 765 of file VDTMath.h.
References fast_isqrt_general().
Referenced by fast_inv().
{return fast_isqrt_general(x,4);}
VDT_FORCE_INLINE double vdt::fast_isqrt_general | ( | double | x, |
const unsigned short | ISQRT_ITERATIONS | ||
) |
Definition at line 746 of file VDTMath.h.
References d2ll(), i, j, ll2d(), x, and detailsBasic3DVector::y.
Referenced by fast_approx_isqrt(), and fast_isqrt().
VDT_FORCE_INLINE double vdt::fast_log | ( | double | x | ) |
Definition at line 279 of file VDTMath.h.
References getMantExponent(), infinity, LOG_LOWER_LIMIT, LOG_UPPER_LIMIT, PX1log, PX2log, PX3log, PX4log, PX5log, PX6log, QX1log, QX2log, QX3log, QX4log, QX5log, SQRTH, x, detailsBasic3DVector::y, and z.
{ double input_x=x; /* separate mantissa from exponent */ double fe; x = getMantExponent(x,fe); // blending if( x < SQRTH ) { fe-=1; x += x ; } x -= 1.0; /* rational form */ double z = x*x; double px = PX1log; px *= x; px += PX2log; px *= x; px += PX3log; px *= x; px += PX4log; px *= x; px += PX5log; px *= x; px += PX6log; // //for the final formula px *= x; px *= z; double qx = x; qx += QX1log; qx *=x; qx += QX2log; qx *=x; qx += QX3log; qx *=x; qx += QX4log; qx *=x; qx += QX5log; double y = px / qx ; y -= fe * 2.121944400546905827679e-4; y -= 0.5 * z ; z = x + y; z += fe * 0.693359375; if (input_x > LOG_UPPER_LIMIT) z = std::numeric_limits<double>::infinity(); if (input_x < LOG_LOWER_LIMIT) z = - std::numeric_limits<double>::infinity(); // std::cout << input_x << " " << std::log(input_x) << " " << z << std::endl; return( z ); }
void vdt::fast_log_vect_46 | ( | double const * | input, |
double * | output, | ||
const unsigned int | arr_size | ||
) |
Some tweaks to make it vectorise with gcc46.
Definition at line 150 of file VDTMath.cc.
References getMantExponent(), i, infinity, collect_tpl::input, LOG_LOWER_LIMIT, LOG_UPPER_LIMIT, PX1log, PX2log, PX3log, PX4log, PX5log, PX6log, QX1log, QX2log, QX3log, QX4log, QX5log, SQRTH, x, detailsBasic3DVector::y, and z.
{ double* input = new double [arr_size]; double* x_arr = new double [arr_size]; int* fe_arr = new int [arr_size]; double y, z; double px,qx; double x; int fe; // Profitability threshold = 4 for (unsigned int i = 0; i < arr_size; ++i) { input[i] = original_input[i]; x= input[i]; /* separate mantissa from exponent */ // double input_x=x; /* separate mantissa from exponent */ double fe; x = getMantExponent(x,fe); // blending if( x < SQRTH ) { fe-=1; x += x ; } x -= 1.0; x_arr[i]=x; fe_arr[i]=fe; } // Profitability threshold = 7 for (unsigned int i = 0; i < arr_size; ++i) { x = x_arr[i]; fe = fe_arr[i]; /* rational form */ z = x*x; px = PX1log; px *= x; px += PX2log; px *= x; px += PX3log; px *= x; px += PX4log; px *= x; px += PX5log; px *= x; px += PX6log; // //for the final formula px *= x; px *= z; qx = x; qx += QX1log; qx *=x; qx += QX2log; qx *=x; qx += QX3log; qx *=x; qx += QX4log; qx *=x; qx += QX5log; y = px / qx ; y -= fe * 2.121944400546905827679e-4; y -= 0.5 * z ; z = x + y; z += fe * 0.693359375; output[i]= z; } for (unsigned int i = 0; i < arr_size; ++i) { if (original_input[i] > LOG_UPPER_LIMIT) output[i] = std::numeric_limits<double>::infinity(); if (original_input[i] < LOG_LOWER_LIMIT) output[i] = - std::numeric_limits<double>::infinity(); } delete [] input; delete [] x_arr; delete [] fe_arr; }
VDT_FORCE_INLINE double vdt::fast_sin | ( | double | x | ) |
Sin defined between -2pi and 2pi.
Definition at line 345 of file VDTMath.h.
References C1cos, C1sin, C2cos, C2sin, C3cos, C3sin, C4cos, C4sin, C5cos, C5sin, C6cos, C6sin, DP1sc, DP2sc, DP3sc, j, PI, PIO2, PIO4, TWOPI, x, detailsBasic3DVector::y, and z.
{ int sign = 1; if (x < 0){ x = - x; sign = -1; } if( x > PI ){ x = TWOPI - x; sign = - sign; } if( x > PIO2 ) x = PI - x ; double y = int( x/PIO4 ); // integer part of x/PIO4 int j=0; if (x>PIO4){ j=2; y+=1; } /* Extended precision modular arithmetic */ double z = ((x - y * DP1sc) - y * DP2sc) - y * DP3sc; double zz = z * z; double px=0; if( j==2 ){ px = C1cos; px *= zz; px += C2cos; px *= zz; px += C3cos; px *= zz; px += C4cos; px *= zz; px += C5cos; px *= zz; px += C6cos; y = 1.0 - zz * .5 + zz * zz * px; } else{ px = C1sin; px *= zz; px += C2sin; px *= zz; px += C3sin; px *= zz; px += C4sin; px *= zz; px += C5sin; px *= zz; px += C6sin; y = z + z * zz * px; } y *= sign; return y; }
VDT_FORCE_INLINE double vdt::fast_tan | ( | double | x | ) |
Sin defined between -2pi and 2pi.
Definition at line 579 of file VDTMath.h.
References abs, DP1tan, DP2tan, DP3tan, alignCSCRings::e, PI, PIO2, PIO4, PX1tan, PX2tan, PX3tan, QX1tan, QX2tan, QX3tan, QX4tan, x, detailsBasic3DVector::y, and z.
{ /* DP * Some of the ifs had to be skipped and replaced by calculations. This allowed * the vectorisation but introduced a loss of performance. * A solution should be found */ // make argument positive but save the sign // without ifs double abs_x =std::abs(x); int sign = x/abs_x; x = abs_x; // remove this if // if (x > PI) // x = x - PI; // like this: int nPI = x /PI; x = x - nPI * PI; // reflect and flip with if // if (x > PIO2){ // x = PI - x ; // sign = - sign; // } // and without int nPIO2 = x/PIO2; int factor = ( 1 - 2* nPIO2); x = nPIO2* PI + factor * x; sign *= factor; /* compute x mod PIO4 */ int nPIO4 = x/PIO4; double y = 2 * nPIO4; /* integer and fractional part modulo one octant */ // This if can be removed and the expression becomes // if (x > PIO4){ // y=2.0; // } // like this: // y = y + nPIO4; double z = ((x - y * DP1tan) - y * DP2tan) - y * DP3tan; double zz = z * z; y=z; if( zz > 1.0e-14 ){ double px = PX1tan; px *= zz; px += PX2tan; px *= zz; px += PX3tan; double qx=zz; qx += QX1tan; qx *=zz; qx += QX2tan; qx *=zz; qx += QX3tan; qx *=zz; qx += QX4tan; y = z + z * zz * px / qx; } // here if we are in the second octant we have // y = -1 /y // else we have y! // again a trick not to use ifs... y -= nPIO4 * ( y + 1.0 / y); y *= sign; return y ; }
VDT_FORCE_INLINE double vdt::getMantExponent | ( | double | x, |
double & | fe | ||
) |
Like frexp but vectorising and the exponent is a double.
Definition at line 196 of file VDTMath.h.
References d2ll(), alignCSCRings::e, asciidump::le, ll2d(), n, and x.
Referenced by fast_log(), and fast_log_vect_46().
{ unsigned long long n = d2ll(x); // shift to the right up to the beginning of the exponent // then with a mask, cut off the sign bit unsigned long long le = ((n >> 52) & 0x7ffLL); // chop the head of the number: an int contains more than 11 bits (32) int e = le; // This is important since sums on ull do not vectorise fe = (e-1023) +1 ; // the plus one to make the result identical to frexp // 13 times f means 52 1. Masking with this means putting to 0 exponent // and sign of a double, leaving the Mantissa, the first 52 bits of a double. n &=0xfffffffffffffLL; // build a mask which is 0.5, i.e. an exponent equal to 1022 // which means *2, see the above +1. const unsigned long long p05 = d2ll(0.5); n |= p05; x = ll2d(n); return x; }
VDT_FORCE_INLINE double vdt::ll2d | ( | unsigned long long | x | ) |
Converts an unsigned long long to a double.
Definition at line 179 of file VDTMath.h.
References vdt::ieee754::d, vdt::ieee754::ll, tmp, and x.
Referenced by fast_isqrt_general(), and getMantExponent().
void vdt::print_instructions_info | ( | ) |
Print the instructions used on screen.
Check and print which instructions sets are enabled.
Definition at line 24 of file VDTMath.cc.
References gather_cfg::cout.
{ std::cout << "\nList of enabled instructions' sets:\n"; #ifdef __SSE2__ std::cout << " - SSE2 instructions enabled" << std::endl; #else std::cout << " - SSE2 instructions *not* enabled" << std::endl; #endif #ifdef __SSE3__ std::cout << " - SSE3 instructions enabled" << std::endl; #else std::cout << " - SSE3 instructions *not* enabled" << std::endl; #endif #ifdef __SSE4_1__ std::cout << " - SSE4.1 instructions enabled" << std::endl; #else std::cout << " - SSE4.1 instructions *not* enabled" << std::endl; #endif #ifdef __AVX__ std::cout << " - AVX instructions enabled" << std::endl; #else std::cout << " - AVX instructions *not* enabled" << std::endl; #endif std::cout << "\n\n"; }
VDT_FORCE_INLINE double vdt::std_inv | ( | double | x | ) |
Definition at line 795 of file VDTMath.h.
References x.
Referenced by std_inv_vect().
{return 1./x;}
VDT_FORCE_INLINE void vdt::std_inv_vect | ( | double const *VDT_RESTRICT | input, |
double *VDT_RESTRICT | output, | ||
const unsigned int | arr_size | ||
) |
VDT_FORCE_INLINE double vdt::std_isqrt | ( | double | x | ) |
Definition at line 772 of file VDTMath.h.
References mathSSE::sqrt().
Referenced by std_isqrt_vect().
VDT_FORCE_INLINE void vdt::std_isqrt_vect | ( | double const *VDT_RESTRICT | input, |
double *VDT_RESTRICT | output, | ||
const unsigned int | arr_size | ||
) |
constexpr double vdt::ATAN_LIMIT = 1e307 |
constexpr double vdt::C1cos = -1.13585365213876817300E-11 |
Definition at line 91 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::C1sin = 1.58962301576546568060E-10 |
Definition at line 83 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::C2cos = 2.08757008419747316778E-9 |
Definition at line 92 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::C2sin = -2.50507477628578072866E-8 |
Definition at line 84 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::C3cos = -2.75573141792967388112E-7 |
Definition at line 93 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::C3sin = 2.75573136213857245213E-6 |
Definition at line 85 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::C4cos = 2.48015872888517045348E-5 |
Definition at line 94 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::C4sin = -1.98412698295895385996E-4 |
Definition at line 86 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::C5cos = -1.38888888888730564116E-3 |
Definition at line 95 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::C5sin = 8.33333333332211858878E-3 |
Definition at line 87 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::C6cos = 4.16666666666665929218E-2 |
Definition at line 96 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::C6sin = -1.66666666666666307295E-1 |
Definition at line 88 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::DP1sc = 7.85398125648498535156E-1 |
Definition at line 72 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::DP1tan = 7.853981554508209228515625E-1 |
Definition at line 134 of file VDTMath.h.
Referenced by fast_tan().
constexpr double vdt::DP2sc = 3.77489470793079817668E-8 |
Definition at line 73 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::DP2tan = 7.94662735614792836714E-9 |
Definition at line 135 of file VDTMath.h.
Referenced by fast_tan().
constexpr double vdt::DP3sc = 2.69515142907905952645E-15 |
Definition at line 74 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().
constexpr double vdt::DP3tan = 3.06161699786838294307E-17 |
Definition at line 136 of file VDTMath.h.
Referenced by fast_tan().
constexpr double vdt::EXP_LIMIT = 708. |
Definition at line 46 of file VDTMath.h.
Referenced by fast_exp(), and fast_exp_vect_46().
constexpr double vdt::LOG2E = 1.4426950408889634073599 |
Definition at line 35 of file VDTMath.h.
Referenced by fast_exp(), and fast_exp_vect_46().
constexpr double vdt::LOG_LOWER_LIMIT = 1e-307 |
Definition at line 57 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::LOG_UPPER_LIMIT = 1e307 |
Definition at line 56 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::MOREBITS = 6.123233995736765886130E-17 |
Definition at line 141 of file VDTMath.h.
Referenced by fast_asin(), and fast_atan().
constexpr double vdt::MOREBITSO2 = MOREBITS/2. |
Definition at line 142 of file VDTMath.h.
Referenced by fast_atan().
constexpr double vdt::PI = M_PI |
Definition at line 76 of file VDTMath.h.
Referenced by fast_cos(), fast_sin(), and fast_tan().
constexpr double vdt::PIO2 = M_PI_2 |
Definition at line 77 of file VDTMath.h.
Referenced by fast_atan(), fast_cos(), fast_sin(), and fast_tan().
constexpr double vdt::PIO4 = M_PI_4 |
Definition at line 78 of file VDTMath.h.
Referenced by fast_asin(), fast_atan(), fast_cos(), fast_sin(), and fast_tan().
constexpr double vdt::PX1asin = 4.253011369004428248960E-3 |
Definition at line 111 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::PX1atan = -8.750608600031904122785E-1 |
Definition at line 144 of file VDTMath.h.
Referenced by fast_atan().
constexpr double vdt::PX1exp = 1.26177193074810590878E-4 |
Definition at line 47 of file VDTMath.h.
Referenced by fast_exp(), and fast_exp_vect_46().
constexpr double vdt::PX1log = 1.01875663804580931796E-4 |
Definition at line 58 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::PX1tan = -1.30936939181383777646E4 |
Definition at line 125 of file VDTMath.h.
Referenced by fast_tan().
constexpr double vdt::PX2asin = -6.019598008014123785661E-1 |
Definition at line 112 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::PX2atan = -1.615753718733365076637E1 |
Definition at line 145 of file VDTMath.h.
Referenced by fast_atan().
constexpr double vdt::PX2exp = 3.02994407707441961300E-2 |
Definition at line 48 of file VDTMath.h.
Referenced by fast_exp(), and fast_exp_vect_46().
constexpr double vdt::PX2log = 4.97494994976747001425E-1 |
Definition at line 59 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::PX2tan = 1.15351664838587416140E6 |
Definition at line 126 of file VDTMath.h.
Referenced by fast_tan().
constexpr double vdt::PX3asin = 5.444622390564711410273E0 |
Definition at line 113 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::PX3atan = -7.500855792314704667340E1 |
Definition at line 146 of file VDTMath.h.
Referenced by fast_atan().
constexpr double vdt::PX3exp = 9.99999999999999999910E-1 |
Definition at line 49 of file VDTMath.h.
Referenced by fast_exp(), and fast_exp_vect_46().
constexpr double vdt::PX3log = 4.70579119878881725854E0 |
Definition at line 60 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::PX3tan = -1.79565251976484877988E7 |
Definition at line 127 of file VDTMath.h.
Referenced by fast_tan().
constexpr double vdt::PX4asin = -1.626247967210700244449E1 |
Definition at line 114 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::PX4atan = -1.228866684490136173410E2 |
Definition at line 147 of file VDTMath.h.
Referenced by fast_atan().
constexpr double vdt::PX4log = 1.44989225341610930846E1 |
Definition at line 61 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::PX5asin = 1.956261983317594739197E1 |
Definition at line 115 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::PX5atan = -6.485021904942025371773E1 |
Definition at line 148 of file VDTMath.h.
Referenced by fast_atan().
constexpr double vdt::PX5log = 1.79368678507819816313E1 |
Definition at line 62 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::PX6asin = -8.198089802484824371615E0 |
Definition at line 116 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::PX6log = 7.70838733755885391666E0 |
Definition at line 63 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::QX1asin = -1.474091372988853791896E1 |
Definition at line 118 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::QX1atan = - 2.485846490142306297962E1 |
Definition at line 150 of file VDTMath.h.
Referenced by fast_atan().
constexpr double vdt::QX1exp = 3.00198505138664455042E-6 |
Definition at line 50 of file VDTMath.h.
Referenced by fast_exp(), and fast_exp_vect_46().
constexpr double vdt::QX1log = 1.12873587189167450590E1 |
Definition at line 65 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::QX1tan = 1.36812963470692954678E4 |
Definition at line 129 of file VDTMath.h.
Referenced by fast_tan().
constexpr double vdt::QX2asin = 7.049610280856842141659E1 |
Definition at line 119 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::QX2atan = 1.650270098316988542046E2 |
Definition at line 151 of file VDTMath.h.
Referenced by fast_atan().
constexpr double vdt::QX2exp = 2.52448340349684104192E-3 |
Definition at line 51 of file VDTMath.h.
Referenced by fast_exp(), and fast_exp_vect_46().
constexpr double vdt::QX2log = 4.52279145837532221105E1 |
Definition at line 66 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::QX2tan = -1.32089234440210967447E6 |
Definition at line 130 of file VDTMath.h.
Referenced by fast_tan().
constexpr double vdt::QX3asin = -1.471791292232726029859E2 |
Definition at line 120 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::QX3atan = 4.328810604912902668951E2 |
Definition at line 152 of file VDTMath.h.
Referenced by fast_atan().
constexpr double vdt::QX3exp = 2.27265548208155028766E-1 |
Definition at line 52 of file VDTMath.h.
Referenced by fast_exp(), and fast_exp_vect_46().
constexpr double vdt::QX3log = 8.29875266912776603211E1 |
Definition at line 67 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::QX3tan = 2.50083801823357915839E7 |
Definition at line 131 of file VDTMath.h.
Referenced by fast_tan().
constexpr double vdt::QX4asin = 1.395105614657485689735E2 |
Definition at line 121 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::QX4atan = 4.853903996359136964868E2 |
Definition at line 153 of file VDTMath.h.
Referenced by fast_atan().
constexpr double vdt::QX4exp = 2.00000000000000000009E0 |
Definition at line 53 of file VDTMath.h.
Referenced by fast_exp(), and fast_exp_vect_46().
constexpr double vdt::QX4log = 7.11544750618563894466E1 |
Definition at line 68 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::QX4tan = -5.38695755929454629881E7 |
Definition at line 132 of file VDTMath.h.
Referenced by fast_tan().
constexpr double vdt::QX5asin = -4.918853881490881290097E1 |
Definition at line 122 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::QX5atan = 1.945506571482613964425E2 |
Definition at line 154 of file VDTMath.h.
Referenced by fast_atan().
constexpr double vdt::QX5log = 2.31251620126765340583E1 |
Definition at line 69 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::RX1asin = 2.967721961301243206100E-3 |
Definition at line 100 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::RX2asin = -5.634242780008963776856E-1 |
Definition at line 101 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::RX3asin = 6.968710824104713396794E0 |
Definition at line 102 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::RX4asin = -2.556901049652824852289E1 |
Definition at line 103 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::RX5asin = 2.853665548261061424989E1 |
Definition at line 104 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::SIN_LOWER_LIMIT = -SIN_UPPER_LIMIT |
constexpr double vdt::SIN_UPPER_LIMIT = TWOPI |
constexpr double vdt::SQRT_LIMIT = 1e307 |
constexpr double vdt::SQRTH = 0.70710678118654752440 |
Definition at line 36 of file VDTMath.h.
Referenced by fast_log(), and fast_log_vect_46().
constexpr double vdt::SX1asin = -2.194779531642920639778E1 |
Definition at line 106 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::SX2asin = 1.470656354026814941758E2 |
Definition at line 107 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::SX3asin = -3.838770957603691357202E2 |
Definition at line 108 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::SX4asin = 3.424398657913078477438E2 |
Definition at line 109 of file VDTMath.h.
Referenced by fast_asin().
constexpr double vdt::T3PO8 = 2.41421356237309504880 |
Definition at line 140 of file VDTMath.h.
Referenced by fast_atan().
constexpr double vdt::TAN_LIMIT = TWOPI |
constexpr double vdt::TWOPI = 2.*M_PI |
Definition at line 75 of file VDTMath.h.
Referenced by fast_cos(), and fast_sin().