CMS 3D CMS Logo

Classes | Functions | Variables

vdt Namespace Reference

Classes

union  ieee754
 Used to switch between different type of interpretations of the data (64 bits) More...

Functions

VDT_FORCE_INLINE unsigned long long d2ll (double x)
 Converts a double to an unsigned long long.
VDT_FORCE_INLINE double fast_acos (double x)
VDT_FORCE_INLINE double fast_approx_inv (double x)
VDT_FORCE_INLINE double fast_approx_isqrt (double x)
VDT_FORCE_INLINE double fast_asin (double x)
VDT_FORCE_INLINE double fast_atan (double x)
VDT_FORCE_INLINE double fast_cos (double x)
 Cos defined between -2pi and 2pi.
VDT_FORCE_INLINE double fast_exp (double x)
 Exponential Function.
void fast_exp_vect_46 (double const *input, double *output, const unsigned int arr_size)
 Some tweaks to make it vectorise with gcc46.
VDT_FORCE_INLINE double fast_inv (double x)
VDT_FORCE_INLINE double fast_isqrt (double x)
VDT_FORCE_INLINE double fast_isqrt_general (double x, const unsigned short ISQRT_ITERATIONS)
VDT_FORCE_INLINE double fast_log (double x)
void fast_log_vect_46 (double const *input, double *output, const unsigned int arr_size)
 Some tweaks to make it vectorise with gcc46.
VDT_FORCE_INLINE double fast_sin (double x)
 Sin defined between -2pi and 2pi.
VDT_FORCE_INLINE double fast_tan (double x)
 Sin defined between -2pi and 2pi.
VDT_FORCE_INLINE double getMantExponent (double x, double &fe)
 Like frexp but vectorising and the exponent is a double.
VDT_FORCE_INLINE double ll2d (unsigned long long x)
 Converts an unsigned long long to a double.
void print_instructions_info ()
 Print the instructions used on screen.
VDT_FORCE_INLINE double std_inv (double x)
VDT_FORCE_INLINE void std_inv_vect (double const *VDT_RESTRICT input, double *VDT_RESTRICT output, const unsigned int arr_size) CMS_VECTORIZE_VERBOSE
VDT_FORCE_INLINE double std_isqrt (double x)
VDT_FORCE_INLINE void std_isqrt_vect (double const *VDT_RESTRICT input, double *VDT_RESTRICT output, const unsigned int arr_size) CMS_VECTORIZE_VERBOSE

Variables

constexpr double ATAN_LIMIT = 1e307
constexpr double C1cos = -1.13585365213876817300E-11
constexpr double C1sin = 1.58962301576546568060E-10
constexpr double C2cos = 2.08757008419747316778E-9
constexpr double C2sin = -2.50507477628578072866E-8
constexpr double C3cos = -2.75573141792967388112E-7
constexpr double C3sin = 2.75573136213857245213E-6
constexpr double C4cos = 2.48015872888517045348E-5
constexpr double C4sin = -1.98412698295895385996E-4
constexpr double C5cos = -1.38888888888730564116E-3
constexpr double C5sin = 8.33333333332211858878E-3
constexpr double C6cos = 4.16666666666665929218E-2
constexpr double C6sin = -1.66666666666666307295E-1
constexpr double DP1sc = 7.85398125648498535156E-1
constexpr double DP1tan = 7.853981554508209228515625E-1
constexpr double DP2sc = 3.77489470793079817668E-8
constexpr double DP2tan = 7.94662735614792836714E-9
constexpr double DP3sc = 2.69515142907905952645E-15
constexpr double DP3tan = 3.06161699786838294307E-17
constexpr double EXP_LIMIT = 708.
constexpr double LOG2E = 1.4426950408889634073599
constexpr double LOG_LOWER_LIMIT = 1e-307
constexpr double LOG_UPPER_LIMIT = 1e307
constexpr double MOREBITS = 6.123233995736765886130E-17
constexpr double MOREBITSO2 = MOREBITS/2.
constexpr double PI = M_PI
constexpr double PIO2 = M_PI_2
constexpr double PIO4 = M_PI_4
constexpr double PX1asin = 4.253011369004428248960E-3
constexpr double PX1atan = -8.750608600031904122785E-1
constexpr double PX1exp = 1.26177193074810590878E-4
constexpr double PX1log = 1.01875663804580931796E-4
constexpr double PX1tan = -1.30936939181383777646E4
constexpr double PX2asin = -6.019598008014123785661E-1
constexpr double PX2atan = -1.615753718733365076637E1
constexpr double PX2exp = 3.02994407707441961300E-2
constexpr double PX2log = 4.97494994976747001425E-1
constexpr double PX2tan = 1.15351664838587416140E6
constexpr double PX3asin = 5.444622390564711410273E0
constexpr double PX3atan = -7.500855792314704667340E1
constexpr double PX3exp = 9.99999999999999999910E-1
constexpr double PX3log = 4.70579119878881725854E0
constexpr double PX3tan = -1.79565251976484877988E7
constexpr double PX4asin = -1.626247967210700244449E1
constexpr double PX4atan = -1.228866684490136173410E2
constexpr double PX4log = 1.44989225341610930846E1
constexpr double PX5asin = 1.956261983317594739197E1
constexpr double PX5atan = -6.485021904942025371773E1
constexpr double PX5log = 1.79368678507819816313E1
constexpr double PX6asin = -8.198089802484824371615E0
constexpr double PX6log = 7.70838733755885391666E0
constexpr double QX1asin = -1.474091372988853791896E1
constexpr double QX1atan = - 2.485846490142306297962E1
constexpr double QX1exp = 3.00198505138664455042E-6
constexpr double QX1log = 1.12873587189167450590E1
constexpr double QX1tan = 1.36812963470692954678E4
constexpr double QX2asin = 7.049610280856842141659E1
constexpr double QX2atan = 1.650270098316988542046E2
constexpr double QX2exp = 2.52448340349684104192E-3
constexpr double QX2log = 4.52279145837532221105E1
constexpr double QX2tan = -1.32089234440210967447E6
constexpr double QX3asin = -1.471791292232726029859E2
constexpr double QX3atan = 4.328810604912902668951E2
constexpr double QX3exp = 2.27265548208155028766E-1
constexpr double QX3log = 8.29875266912776603211E1
constexpr double QX3tan = 2.50083801823357915839E7
constexpr double QX4asin = 1.395105614657485689735E2
constexpr double QX4atan = 4.853903996359136964868E2
constexpr double QX4exp = 2.00000000000000000009E0
constexpr double QX4log = 7.11544750618563894466E1
constexpr double QX4tan = -5.38695755929454629881E7
constexpr double QX5asin = -4.918853881490881290097E1
constexpr double QX5atan = 1.945506571482613964425E2
constexpr double QX5log = 2.31251620126765340583E1
constexpr double RX1asin = 2.967721961301243206100E-3
constexpr double RX2asin = -5.634242780008963776856E-1
constexpr double RX3asin = 6.968710824104713396794E0
constexpr double RX4asin = -2.556901049652824852289E1
constexpr double RX5asin = 2.853665548261061424989E1
constexpr double SIN_LOWER_LIMIT = -SIN_UPPER_LIMIT
constexpr double SIN_UPPER_LIMIT = TWOPI
constexpr double SQRT_LIMIT = 1e307
constexpr double SQRTH = 0.70710678118654752440
constexpr double SX1asin = -2.194779531642920639778E1
constexpr double SX2asin = 1.470656354026814941758E2
constexpr double SX3asin = -3.838770957603691357202E2
constexpr double SX4asin = 3.424398657913078477438E2
constexpr double T3PO8 = 2.41421356237309504880
constexpr double TAN_LIMIT = TWOPI
constexpr double TWOPI = 2.*M_PI

Function Documentation

VDT_FORCE_INLINE unsigned long long vdt::d2ll ( double  x)

Converts a double to an unsigned long long.

Definition at line 188 of file VDTMath.h.

References vdt::ieee754::d, vdt::ieee754::ll, tmp, and x.

Referenced by fast_isqrt_general(), and getMantExponent().

                                                   {
  ieee754 tmp;
  tmp.d=x;
  return tmp.ll;
}
VDT_FORCE_INLINE double vdt::fast_acos ( double  x)

Definition at line 564 of file VDTMath.h.

References fast_asin(), mathSSE::sqrt(), and z.

                                           {
  double z;
 
//   z = PIO4 - fast_asin(x);
//   z += MOREBITS;
//   z += PIO4;

//   if (x > .5 )
    z = 2.0 * fast_asin(  sqrt(0.5 - 0.5 * x ) ) ;
    
  return z;
  }
VDT_FORCE_INLINE double vdt::fast_approx_inv ( double  x)

Definition at line 786 of file VDTMath.h.

References fast_approx_isqrt(), x, and detailsBasic3DVector::y.

                                                   {
    double sign = 1;
    if( x < 0.0 ) {
        x = - x;
        sign = -1;
        }
    double y=fast_approx_isqrt(x); 
    return y*y*sign;}
VDT_FORCE_INLINE double vdt::fast_approx_isqrt ( double  x)

Definition at line 768 of file VDTMath.h.

References fast_isqrt_general().

Referenced by fast_approx_inv().

{return fast_isqrt_general(x,3);}
VDT_FORCE_INLINE double vdt::fast_asin ( double  x)

Definition at line 413 of file VDTMath.h.

References a, alignCSCRings::e, MOREBITS, AlCaHLTBitMon_ParallelJobs::p, PIO4, PX1asin, PX2asin, PX3asin, PX4asin, PX5asin, PX6asin, QX1asin, QX2asin, QX3asin, QX4asin, QX5asin, RX1asin, RX2asin, RX3asin, RX4asin, RX5asin, mathSSE::sqrt(), SX1asin, SX2asin, SX3asin, SX4asin, x, and z.

Referenced by fast_acos().

                                           {

  int sign=1;
  double a = x; //necessary for linear approx

  if ( x < 0. ){
    sign *= -1;
    a *= -1;
    }

  double p, z, zz;
  double px,qx;

  /* arcsin(1-x) = pi/2 - sqrt(2x)(1+R(x))  */
  zz = 1.0 - a;
  
  px = RX1asin;
  px*= zz;
  px+= RX2asin;
  px*= zz;    
  px+= RX3asin;
  px*= zz;    
  px+= RX4asin;
  px*= zz;    
  px+= RX5asin;

  qx = zz;
  qx+= SX1asin;
  qx*= zz;    
  qx+= SX2asin;
  qx*= zz;    
  qx+= SX3asin;
  qx*= zz;    
  qx+= SX4asin;
  
  p =zz* px/qx;
  
//     p = zz * polevl( zz, R, 4)/p1evl( zz, S, 4);
  
  zz = std::sqrt(zz+zz);
  z = PIO4 - zz;
  zz = zz * p - MOREBITS;
  z -= zz;
  z += PIO4;
    
  if( a < 0.625 ){
    zz = a * a;
    px = PX1asin;
    px*= zz;
    px+= PX2asin;
    px*= zz;    
    px+= PX3asin;
    px*= zz;    
    px+= PX4asin;
    px*= zz;    
    px+= PX5asin;
    px*= zz;    
    px+= PX6asin;

    qx = zz;
    qx+= QX1asin;
    qx*= zz;    
    qx+= QX2asin;
    qx*= zz;    
    qx+= QX3asin;
    qx*= zz;    
    qx+= QX4asin;
    qx*= zz;    
    qx+= QX5asin;
    
    z = zz*px/qx;    

    z = a * z + a;
    }

  z *= sign;

   //linear approx, not sooo needed but seable. Price is cheap though
  if( a < 1.0e-8 )
    z = a;
  
  return z;
  }
VDT_FORCE_INLINE double vdt::fast_atan ( double  x)

Definition at line 664 of file VDTMath.h.

References MOREBITS, MOREBITSO2, PIO2, PIO4, PX1atan, PX2atan, PX3atan, PX4atan, PX5atan, QX1atan, QX2atan, QX3atan, QX4atan, QX5atan, T3PO8, x, detailsBasic3DVector::y, and z.

                                           {

    /* make argument positive and save the sign */
    int sign = 1;
    if( x < 0.0 ) {
        x = - x;
        sign = -1;
        }

    /* range reduction */
    double originalx=x;

// This is slower!
//     double y = 0.0;
//     double factor = 0.;
// 
//     if (x  > .66){
//         y = PIO4;
//         factor = MOREBITSO2;
//         x = (x-1.0) / (x+1.0);
//         }
//     if( originalx > T3PO8 ) {
//         y = PIO2;
//         factor = MOREBITS;
//         x = -1.0 / originalx ;
//         }

    double y = PIO4;
    double factor = MOREBITSO2;
    x = (x-1.0) / (x+1.0);

    if( originalx > T3PO8 ) {
        y = PIO2;
        //flag = 1.;
        factor = MOREBITS;
        x = -1.0 / originalx ;
        }
    if ( originalx <= 0.66 ) {
        y = 0.0;
        x = originalx;
        //flag = 0.;
        factor = 0.;
        }

    double z = x * x;

    double px = PX1atan;
    px *= z;
    px += PX2atan;
    px *= z;
    px += PX3atan;
    px *= z;
    px += PX4atan;
    px *= z;
    px += PX5atan;
    px *= z; // for the final formula

    double qx=z;
    qx += QX1atan;
    qx *=z;
    qx += QX2atan;
    qx *=z;
    qx += QX3atan;
    qx *=z;
    qx += QX4atan;
    qx *=z;
    qx += QX5atan;

//     z = px / qx;
//     z = x * px / qx + x;

    y = y +x * px / qx + x +factor;

    y = sign * y;

    return y;
    }
VDT_FORCE_INLINE double vdt::fast_cos ( double  x)

Cos defined between -2pi and 2pi.

Definition at line 501 of file VDTMath.h.

References abs, C1cos, C1sin, C2cos, C2sin, C3cos, C3sin, C4cos, C4sin, C5cos, C5sin, C6cos, C6sin, DP1sc, DP2sc, DP3sc, j, PI, PIO2, PIO4, TWOPI, x, detailsBasic3DVector::y, and z.

                                          {

  x = std::abs(x);

  if( x > PI )
    x = TWOPI - x ;

  int sign = 1;
   if( x > PIO2 ){
     x = PI - x;
     sign=-1;
     }

  double y = int( x/PIO4 ); // integer part of x/PIO4 

  int j=0;
  if (x>PIO4){
    j=2;
    y+=1;
    sign = -sign;
    }

  /* Extended precision modular arithmetic */
  double z = ((x - y * DP1sc) - y * DP2sc) - y * DP3sc;

  double zz = z * z;

  double px=0;
  if( j==2 ){
    px  = C1sin;
    px *= zz;
    px += C2sin;
    px *= zz;
    px += C3sin;
    px *= zz;
    px += C4sin;
    px *= zz;
    px += C5sin;
    px *= zz;
    px += C6sin;
    y = z  +  z * zz * px;
    }
  else{
    px  = C1cos;
    px *= zz;
    px += C2cos;
    px *= zz;
    px += C3cos;
    px *= zz;
    px += C4cos;
    px *= zz;
    px += C5cos;
    px *= zz;
    px += C6cos;  
    y = 1. - zz * .5 + zz * zz * px;  
    }
  
  y *= sign;
  
  return y;
  }
VDT_FORCE_INLINE double vdt::fast_exp ( double  x)

Exponential Function.

Definition at line 226 of file VDTMath.h.

References vdt::ieee754::d, EXP_LIMIT, infinity, vdt::ieee754::ll, LOG2E, n, PX1exp, PX2exp, PX3exp, QX1exp, QX2exp, QX3exp, QX4exp, and x.

                                          {

    double initial_x = x;

//    double px =int(LOG2E * x + 0.5); // std::floor(LOG2E * x + 0.5);
    double px = std::floor(LOG2E * x + 0.5);

    int n = px;

    x -= px * 6.93145751953125E-1;
    x -= px * 1.42860682030941723212E-6;

    double xx = x * x;

    // px = x * P(x**2).
    px = PX1exp;
    px *= xx;
    px += PX2exp;
    px *= xx;
    px += PX3exp;
    px *= x;

    // Evaluate Q(x**2).
    double qx = QX1exp;
    qx *= xx;
    qx += QX2exp;
    qx *= xx;
    qx += QX3exp;
    qx *= xx;
    qx += QX4exp;

    // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) )
    x = px / (qx - px);
    x = 1.0 + 2.0 * x;

    // Build 2^n in double.
    ieee754 u;
    u.d = 0;
    n += 1023;
    u.ll = (long long) (n) << 52;

    double res = x * u.d;
    if (initial_x > EXP_LIMIT)
            res = std::numeric_limits<double>::infinity();
    if (initial_x < -EXP_LIMIT)
            res = 0.;   
    
    return res;
}
void vdt::fast_exp_vect_46 ( double const *  input,
double *  output,
const unsigned int  arr_size 
)

Some tweaks to make it vectorise with gcc46.

Exponential Function - some tweaks to have it vectorise in gcc46.

Definition at line 63 of file VDTMath.cc.

References vdt::ieee754::d, EXP_LIMIT, i, infinity, vdt::ieee754::ll, LOG2E, n, PX1exp, PX2exp, PX3exp, QX1exp, QX2exp, QX3exp, QX4exp, and x.

                                                       {

  // input & output must not point to the same memory location
  //    assert( input != output );

  int n;
  int* nv = new int[arr_size];
  double xx, px, qx;
  ieee754 u;

  // for vectorisation
  double x;

  for (unsigned int i = 0; i < arr_size; ++i)
     nv[i] = n = std::floor( LOG2E * input[i] + 0.5 );


  //Profitability threshold = 7
  for (unsigned int i = 0; i < arr_size; ++i) {
    x = input[i];

    //nv[i] = n = int(LOG2E * x + 0.5);//std::floor( LOG2E * x + 0.5 );
    n=nv[i];

    px = n;

    x -= px * 6.93145751953125E-1;
    x -= px * 1.42860682030941723212E-6;

    xx = x * x;

    // px = x * P(x**2).
    px = PX1exp;
    px *= xx;
    px += PX2exp;
    px *= xx;
    px += PX3exp;
    px *= x;

    // Evaluate Q(x**2).
    qx = QX1exp;
    qx *= xx;
    qx += QX2exp;
    qx *= xx;
    qx += QX3exp;
    qx *= xx;
    qx += QX4exp;

    // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) )
    x = px / (qx - px);
    x = 1.0 + 2.0 * x;

    // partial
    output[i]=x;
                  
    } // end loop on input vals
   
  //Profitability threshold = 4
  for (unsigned int i = 0; i < arr_size; ++i) {
      //     Build 2^n in double.
      n=nv[i];
      u.d = 0;
      n += 1023;    
      u.ll = (long long) (n) << 52;
      output[i] = output[i] * u.d; 

      if (input[i] > EXP_LIMIT)
              output[i] = std::numeric_limits<double>::infinity();
      if (input[i] < -EXP_LIMIT)
              output[i] = 0.;     
      }
      
   delete [] nv;
}
VDT_FORCE_INLINE double vdt::fast_inv ( double  x)

Definition at line 776 of file VDTMath.h.

References fast_isqrt(), x, and detailsBasic3DVector::y.

                                            {
    double sign = 1;
    if( x < 0.0 ) {
        x = - x;
        sign = -1;
        }
    double y=fast_isqrt(x); 
    return y*y*sign;
    }
VDT_FORCE_INLINE double vdt::fast_isqrt ( double  x)

Definition at line 765 of file VDTMath.h.

References fast_isqrt_general().

Referenced by fast_inv().

{return fast_isqrt_general(x,4);} 
VDT_FORCE_INLINE double vdt::fast_isqrt_general ( double  x,
const unsigned short  ISQRT_ITERATIONS 
)

Definition at line 746 of file VDTMath.h.

References d2ll(), i, j, ll2d(), x, and detailsBasic3DVector::y.

Referenced by fast_approx_isqrt(), and fast_isqrt().

                                                                                            { 

  double x2 = x * 0.5;
  double y  = x;
  unsigned long long i  = d2ll(y);
  // Evil!
  i  = 0x5fe6eb50c7aa19f9  - ( i >> 1 );
  y  = ll2d(i);
  for (unsigned int j=0;j<ISQRT_ITERATIONS;++j)
      y *= 1.5 - ( x2 * y * y ) ;

  return y;
}
VDT_FORCE_INLINE double vdt::fast_log ( double  x)

Definition at line 279 of file VDTMath.h.

References getMantExponent(), infinity, LOG_LOWER_LIMIT, LOG_UPPER_LIMIT, PX1log, PX2log, PX3log, PX4log, PX5log, PX6log, QX1log, QX2log, QX3log, QX4log, QX5log, SQRTH, x, detailsBasic3DVector::y, and z.

                                          {

    double input_x=x;

    /* separate mantissa from exponent */    
    double fe;
    x = getMantExponent(x,fe);

    // blending      
    if( x < SQRTH ) {
      fe-=1;
      x +=  x ;
      }
    x -= 1.0;

    /* rational form */

    double z = x*x;
    double px =  PX1log;
    px *= x;    
    px += PX2log;
    px *= x;    
    px += PX3log;
    px *= x; 
    px += PX4log;
    px *= x; 
    px += PX5log;
    px *= x;
    px += PX6log;
    //
    //for the final formula
    px *= x; 
    px *= z;

    double qx = x;
    qx += QX1log;
    qx *=x;
    qx += QX2log;
    qx *=x;    
    qx += QX3log;
    qx *=x;    
    qx += QX4log;
    qx *=x;    
    qx += QX5log;

    double y = px / qx ;

    y -= fe * 2.121944400546905827679e-4; 
    y -= 0.5 * z  ;

    z = x + y;
    z += fe * 0.693359375;

    if (input_x > LOG_UPPER_LIMIT)
      z = std::numeric_limits<double>::infinity();
    if (input_x < LOG_LOWER_LIMIT)
      z =  - std::numeric_limits<double>::infinity();       

//     std::cout << input_x << " " << std::log(input_x) << " " << z << std::endl;

    return( z );  
  
  }
void vdt::fast_log_vect_46 ( double const *  input,
double *  output,
const unsigned int  arr_size 
)

Some tweaks to make it vectorise with gcc46.

Definition at line 150 of file VDTMath.cc.

References getMantExponent(), i, infinity, collect_tpl::input, LOG_LOWER_LIMIT, LOG_UPPER_LIMIT, PX1log, PX2log, PX3log, PX4log, PX5log, PX6log, QX1log, QX2log, QX3log, QX4log, QX5log, SQRTH, x, detailsBasic3DVector::y, and z.

                                                       {

  double* input = new double [arr_size];
  double* x_arr = new double [arr_size];
  int* fe_arr = new int [arr_size];
  
  double y, z;
  double px,qx;
  double x;
  int fe;
  // Profitability threshold = 4
  for (unsigned int i = 0; i < arr_size; ++i) {
    input[i] = original_input[i];
    x= input[i];


    /* separate mantissa from exponent
    */
    
//    double input_x=x;

    /* separate mantissa from exponent */    
    double fe;
    x = getMantExponent(x,fe);

    // blending      
    if( x < SQRTH ) {
      fe-=1;
      x +=  x ;
      }
    x -= 1.0;
    
    x_arr[i]=x;
    fe_arr[i]=fe;
    
    }
  // Profitability threshold = 7
  for (unsigned int i = 0; i < arr_size; ++i) {
    
    x = x_arr[i];
    fe = fe_arr[i];
    
    /* rational form */

    z = x*x;
    px =  PX1log;
    px *= x;    
    px += PX2log;
    px *= x;    
    px += PX3log;
    px *= x; 
    px += PX4log;
    px *= x; 
    px += PX5log;
    px *= x;
    px += PX6log;
    //
    //for the final formula
    px *= x; 
    px *= z;


    qx = x;
    qx += QX1log;
    qx *=x;
    qx += QX2log;
    qx *=x;    
    qx += QX3log;
    qx *=x;    
    qx += QX4log;
    qx *=x;    
    qx += QX5log;

    
    y = px / qx ;
   
    y -= fe * 2.121944400546905827679e-4; 
    y -= 0.5 * z  ;
    
    z = x + y;
    z += fe * 0.693359375;
    output[i]= z;
  }
    
  for (unsigned int i = 0; i < arr_size; ++i) {    
    if (original_input[i] > LOG_UPPER_LIMIT)
      output[i] = std::numeric_limits<double>::infinity();
    if (original_input[i] < LOG_LOWER_LIMIT)
      output[i] =  - std::numeric_limits<double>::infinity();                             
    }
  
  delete [] input;
  delete [] x_arr;
  delete [] fe_arr;
  
}
VDT_FORCE_INLINE double vdt::fast_sin ( double  x)

Sin defined between -2pi and 2pi.

Definition at line 345 of file VDTMath.h.

References C1cos, C1sin, C2cos, C2sin, C3cos, C3sin, C4cos, C4sin, C5cos, C5sin, C6cos, C6sin, DP1sc, DP2sc, DP3sc, j, PI, PIO2, PIO4, TWOPI, x, detailsBasic3DVector::y, and z.

                                          {

  int sign = 1;

  if (x < 0){
    x = - x;
    sign = -1;
    }

  if( x > PI ){
    x = TWOPI - x;
    sign = - sign;
    }

  if( x > PIO2 )
    x = PI - x ;

  double y = int( x/PIO4 ); // integer part of x/PIO4 

  int j=0;
  if (x>PIO4){
     j=2;
     y+=1;
     }

  /* Extended precision modular arithmetic */
  double z = ((x - y * DP1sc) - y * DP2sc) - y * DP3sc;

  double zz = z * z;

  double px=0;

  if( j==2 ){
    px  = C1cos;
    px *= zz;
    px += C2cos;
    px *= zz;
    px += C3cos;
    px *= zz;
    px += C4cos;
    px *= zz;
    px += C5cos;
    px *= zz;
    px += C6cos;
    y = 1.0 - zz * .5 + zz * zz * px;
    }
  else{
    px  = C1sin;
    px *= zz;
    px += C2sin;
    px *= zz;
    px += C3sin;
    px *= zz;
    px += C4sin;
    px *= zz;
    px += C5sin;
    px *= zz;
    px += C6sin;
    y = z  +  z * zz * px;
    }

  y *= sign;

  return y;
  }
VDT_FORCE_INLINE double vdt::fast_tan ( double  x)

Sin defined between -2pi and 2pi.

Definition at line 579 of file VDTMath.h.

References abs, DP1tan, DP2tan, DP3tan, alignCSCRings::e, PI, PIO2, PIO4, PX1tan, PX2tan, PX3tan, QX1tan, QX2tan, QX3tan, QX4tan, x, detailsBasic3DVector::y, and z.

                                            {
/* DP
 * Some of the ifs had to be skipped and replaced by calculations. This allowed 
 * the vectorisation but introduced a loss of performance. 
 * A solution should be found
*/  
  
   // make argument positive but save the sign
   // without ifs
   double abs_x =std::abs(x);
   int sign = x/abs_x;
   x = abs_x;

// remove this if
//     if (x > PI)
//        x = x - PI;
// like this:
   int nPI  = x /PI;
   x = x - nPI * PI;


//     reflect and flip with if
//     if (x > PIO2){
//        x = PI - x ;
//        sign = - sign;
//       }
// and without
    int nPIO2 = x/PIO2;
    int factor = ( 1 - 2* nPIO2);
    x = nPIO2* PI + factor * x;
    sign *= factor;


    /* compute x mod PIO4 */
    int nPIO4 = x/PIO4;
    double y = 2 * nPIO4;

    /* integer and fractional part modulo one octant */

//     This if can be removed and the expression becomes
//     if (x > PIO4){
//        y=2.0;
//       }
//   like this:
//     y = y  + nPIO4;


    double z = ((x - y * DP1tan) - y * DP2tan) - y * DP3tan;

    double zz = z * z;

    y=z;

    if( zz > 1.0e-14 ){
        double px = PX1tan;
        px *= zz;
        px += PX2tan;
        px *= zz;
        px += PX3tan;

        double qx=zz;
        qx += QX1tan;
        qx *=zz;
        qx += QX2tan;
        qx *=zz;
        qx += QX3tan;
        qx *=zz;
        qx += QX4tan;

        y = z + z * zz * px / qx;
    }

    // here if we are in the second octant we have
    //  y = -1 /y
   // else we have y!
   // again a trick not to use ifs...
    y -= nPIO4 * (  y  + 1.0 / y);

    y *= sign;

    return y ;
    }
VDT_FORCE_INLINE double vdt::getMantExponent ( double  x,
double &  fe 
)

Like frexp but vectorising and the exponent is a double.

Definition at line 196 of file VDTMath.h.

References d2ll(), alignCSCRings::e, asciidump::le, ll2d(), n, and x.

Referenced by fast_log(), and fast_log_vect_46().

                                                             {
  
  unsigned long long n = d2ll(x);
  
  // shift to the right up to the beginning of the exponent 
  // then with a mask, cut off the sign bit
  unsigned long long le = ((n >> 52) & 0x7ffLL);
  
  // chop the head of the number: an int contains more than 11 bits (32)
  int e = le; // This is important since sums on ull do not vectorise
  fe = (e-1023) +1 ; // the plus one to make the result identical to frexp
  
  // 13 times f means 52 1. Masking with this means putting to 0 exponent
  // and sign of a double, leaving the Mantissa, the first 52 bits of a double.
  n &=0xfffffffffffffLL;
  
  // build a mask which is 0.5, i.e. an exponent equal to 1022
  // which means *2, see the above +1.
  const unsigned long long p05 = d2ll(0.5);
  n |= p05;
  x = ll2d(n);
  return x;
}
VDT_FORCE_INLINE double vdt::ll2d ( unsigned long long  x)

Converts an unsigned long long to a double.

Definition at line 179 of file VDTMath.h.

References vdt::ieee754::d, vdt::ieee754::ll, tmp, and x.

Referenced by fast_isqrt_general(), and getMantExponent().

                                                   {
  ieee754 tmp;
  tmp.ll=x;
  return tmp.d;
}
void vdt::print_instructions_info ( )

Print the instructions used on screen.

Check and print which instructions sets are enabled.

Definition at line 24 of file VDTMath.cc.

References gather_cfg::cout.

                                 {
  std::cout << "\nList of enabled instructions' sets:\n";
  
#ifdef __SSE2__
        std::cout << " - SSE2 instructions enabled" << std::endl;
#else
        std::cout << " - SSE2 instructions *not* enabled" << std::endl;
#endif
#ifdef __SSE3__
        std::cout << " - SSE3 instructions enabled" << std::endl;
#else
        std::cout << " - SSE3 instructions *not* enabled" << std::endl;
#endif

#ifdef __SSE4_1__
        std::cout << " - SSE4.1 instructions enabled" << std::endl;
#else
        std::cout << " - SSE4.1 instructions *not* enabled" << std::endl;
#endif
#ifdef __AVX__
        std::cout << " - AVX instructions enabled" << std::endl;
#else
        std::cout << " - AVX instructions *not* enabled" << std::endl;
#endif
       std::cout << "\n\n";
    }
VDT_FORCE_INLINE double vdt::std_inv ( double  x)

Definition at line 795 of file VDTMath.h.

References x.

Referenced by std_inv_vect().

{return 1./x;}
VDT_FORCE_INLINE void vdt::std_inv_vect ( double const *VDT_RESTRICT  input,
double *VDT_RESTRICT  output,
const unsigned int  arr_size 
)

Definition at line 880 of file VDTMath.h.

References i, and std_inv().

                                                                      {
  //Profitability threshold = 6
  for (unsigned int i=0;i<arr_size;++i)
    output[i] = vdt::std_inv(input[i]);
  }
VDT_FORCE_INLINE double vdt::std_isqrt ( double  x)

Definition at line 772 of file VDTMath.h.

References mathSSE::sqrt().

Referenced by std_isqrt_vect().

{return 1./std::sqrt(x);}
VDT_FORCE_INLINE void vdt::std_isqrt_vect ( double const *VDT_RESTRICT  input,
double *VDT_RESTRICT  output,
const unsigned int  arr_size 
)

Definition at line 872 of file VDTMath.h.

References i, and std_isqrt().

                                                                      {
  //Profitability threshold = 6
  for (unsigned int i=0;i<arr_size;++i)
    output[i] = vdt::std_isqrt(input[i]);
  }

Variable Documentation

constexpr double vdt::ATAN_LIMIT = 1e307

Definition at line 156 of file VDTMath.h.

constexpr double vdt::C1cos = -1.13585365213876817300E-11

Definition at line 91 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::C1sin = 1.58962301576546568060E-10

Definition at line 83 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::C2cos = 2.08757008419747316778E-9

Definition at line 92 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::C2sin = -2.50507477628578072866E-8

Definition at line 84 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::C3cos = -2.75573141792967388112E-7

Definition at line 93 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::C3sin = 2.75573136213857245213E-6

Definition at line 85 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::C4cos = 2.48015872888517045348E-5

Definition at line 94 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::C4sin = -1.98412698295895385996E-4

Definition at line 86 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::C5cos = -1.38888888888730564116E-3

Definition at line 95 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::C5sin = 8.33333333332211858878E-3

Definition at line 87 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::C6cos = 4.16666666666665929218E-2

Definition at line 96 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::C6sin = -1.66666666666666307295E-1

Definition at line 88 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::DP1sc = 7.85398125648498535156E-1

Definition at line 72 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::DP1tan = 7.853981554508209228515625E-1

Definition at line 134 of file VDTMath.h.

Referenced by fast_tan().

constexpr double vdt::DP2sc = 3.77489470793079817668E-8

Definition at line 73 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::DP2tan = 7.94662735614792836714E-9

Definition at line 135 of file VDTMath.h.

Referenced by fast_tan().

constexpr double vdt::DP3sc = 2.69515142907905952645E-15

Definition at line 74 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().

constexpr double vdt::DP3tan = 3.06161699786838294307E-17

Definition at line 136 of file VDTMath.h.

Referenced by fast_tan().

constexpr double vdt::EXP_LIMIT = 708.

Definition at line 46 of file VDTMath.h.

Referenced by fast_exp(), and fast_exp_vect_46().

constexpr double vdt::LOG2E = 1.4426950408889634073599

Definition at line 35 of file VDTMath.h.

Referenced by fast_exp(), and fast_exp_vect_46().

constexpr double vdt::LOG_LOWER_LIMIT = 1e-307

Definition at line 57 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::LOG_UPPER_LIMIT = 1e307

Definition at line 56 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::MOREBITS = 6.123233995736765886130E-17

Definition at line 141 of file VDTMath.h.

Referenced by fast_asin(), and fast_atan().

constexpr double vdt::MOREBITSO2 = MOREBITS/2.

Definition at line 142 of file VDTMath.h.

Referenced by fast_atan().

constexpr double vdt::PI = M_PI

Definition at line 76 of file VDTMath.h.

Referenced by fast_cos(), fast_sin(), and fast_tan().

constexpr double vdt::PIO2 = M_PI_2

Definition at line 77 of file VDTMath.h.

Referenced by fast_atan(), fast_cos(), fast_sin(), and fast_tan().

constexpr double vdt::PIO4 = M_PI_4

Definition at line 78 of file VDTMath.h.

Referenced by fast_asin(), fast_atan(), fast_cos(), fast_sin(), and fast_tan().

constexpr double vdt::PX1asin = 4.253011369004428248960E-3

Definition at line 111 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::PX1atan = -8.750608600031904122785E-1

Definition at line 144 of file VDTMath.h.

Referenced by fast_atan().

constexpr double vdt::PX1exp = 1.26177193074810590878E-4

Definition at line 47 of file VDTMath.h.

Referenced by fast_exp(), and fast_exp_vect_46().

constexpr double vdt::PX1log = 1.01875663804580931796E-4

Definition at line 58 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::PX1tan = -1.30936939181383777646E4

Definition at line 125 of file VDTMath.h.

Referenced by fast_tan().

constexpr double vdt::PX2asin = -6.019598008014123785661E-1

Definition at line 112 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::PX2atan = -1.615753718733365076637E1

Definition at line 145 of file VDTMath.h.

Referenced by fast_atan().

constexpr double vdt::PX2exp = 3.02994407707441961300E-2

Definition at line 48 of file VDTMath.h.

Referenced by fast_exp(), and fast_exp_vect_46().

constexpr double vdt::PX2log = 4.97494994976747001425E-1

Definition at line 59 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::PX2tan = 1.15351664838587416140E6

Definition at line 126 of file VDTMath.h.

Referenced by fast_tan().

constexpr double vdt::PX3asin = 5.444622390564711410273E0

Definition at line 113 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::PX3atan = -7.500855792314704667340E1

Definition at line 146 of file VDTMath.h.

Referenced by fast_atan().

constexpr double vdt::PX3exp = 9.99999999999999999910E-1

Definition at line 49 of file VDTMath.h.

Referenced by fast_exp(), and fast_exp_vect_46().

constexpr double vdt::PX3log = 4.70579119878881725854E0

Definition at line 60 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::PX3tan = -1.79565251976484877988E7

Definition at line 127 of file VDTMath.h.

Referenced by fast_tan().

constexpr double vdt::PX4asin = -1.626247967210700244449E1

Definition at line 114 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::PX4atan = -1.228866684490136173410E2

Definition at line 147 of file VDTMath.h.

Referenced by fast_atan().

constexpr double vdt::PX4log = 1.44989225341610930846E1

Definition at line 61 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::PX5asin = 1.956261983317594739197E1

Definition at line 115 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::PX5atan = -6.485021904942025371773E1

Definition at line 148 of file VDTMath.h.

Referenced by fast_atan().

constexpr double vdt::PX5log = 1.79368678507819816313E1

Definition at line 62 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::PX6asin = -8.198089802484824371615E0

Definition at line 116 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::PX6log = 7.70838733755885391666E0

Definition at line 63 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::QX1asin = -1.474091372988853791896E1

Definition at line 118 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::QX1atan = - 2.485846490142306297962E1

Definition at line 150 of file VDTMath.h.

Referenced by fast_atan().

constexpr double vdt::QX1exp = 3.00198505138664455042E-6

Definition at line 50 of file VDTMath.h.

Referenced by fast_exp(), and fast_exp_vect_46().

constexpr double vdt::QX1log = 1.12873587189167450590E1

Definition at line 65 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::QX1tan = 1.36812963470692954678E4

Definition at line 129 of file VDTMath.h.

Referenced by fast_tan().

constexpr double vdt::QX2asin = 7.049610280856842141659E1

Definition at line 119 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::QX2atan = 1.650270098316988542046E2

Definition at line 151 of file VDTMath.h.

Referenced by fast_atan().

constexpr double vdt::QX2exp = 2.52448340349684104192E-3

Definition at line 51 of file VDTMath.h.

Referenced by fast_exp(), and fast_exp_vect_46().

constexpr double vdt::QX2log = 4.52279145837532221105E1

Definition at line 66 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::QX2tan = -1.32089234440210967447E6

Definition at line 130 of file VDTMath.h.

Referenced by fast_tan().

constexpr double vdt::QX3asin = -1.471791292232726029859E2

Definition at line 120 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::QX3atan = 4.328810604912902668951E2

Definition at line 152 of file VDTMath.h.

Referenced by fast_atan().

constexpr double vdt::QX3exp = 2.27265548208155028766E-1

Definition at line 52 of file VDTMath.h.

Referenced by fast_exp(), and fast_exp_vect_46().

constexpr double vdt::QX3log = 8.29875266912776603211E1

Definition at line 67 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::QX3tan = 2.50083801823357915839E7

Definition at line 131 of file VDTMath.h.

Referenced by fast_tan().

constexpr double vdt::QX4asin = 1.395105614657485689735E2

Definition at line 121 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::QX4atan = 4.853903996359136964868E2

Definition at line 153 of file VDTMath.h.

Referenced by fast_atan().

constexpr double vdt::QX4exp = 2.00000000000000000009E0

Definition at line 53 of file VDTMath.h.

Referenced by fast_exp(), and fast_exp_vect_46().

constexpr double vdt::QX4log = 7.11544750618563894466E1

Definition at line 68 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::QX4tan = -5.38695755929454629881E7

Definition at line 132 of file VDTMath.h.

Referenced by fast_tan().

constexpr double vdt::QX5asin = -4.918853881490881290097E1

Definition at line 122 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::QX5atan = 1.945506571482613964425E2

Definition at line 154 of file VDTMath.h.

Referenced by fast_atan().

constexpr double vdt::QX5log = 2.31251620126765340583E1

Definition at line 69 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::RX1asin = 2.967721961301243206100E-3

Definition at line 100 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::RX2asin = -5.634242780008963776856E-1

Definition at line 101 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::RX3asin = 6.968710824104713396794E0

Definition at line 102 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::RX4asin = -2.556901049652824852289E1

Definition at line 103 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::RX5asin = 2.853665548261061424989E1

Definition at line 104 of file VDTMath.h.

Referenced by fast_asin().

Definition at line 82 of file VDTMath.h.

constexpr double vdt::SIN_UPPER_LIMIT = TWOPI

Definition at line 81 of file VDTMath.h.

constexpr double vdt::SQRT_LIMIT = 1e307

Definition at line 160 of file VDTMath.h.

constexpr double vdt::SQRTH = 0.70710678118654752440

Definition at line 36 of file VDTMath.h.

Referenced by fast_log(), and fast_log_vect_46().

constexpr double vdt::SX1asin = -2.194779531642920639778E1

Definition at line 106 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::SX2asin = 1.470656354026814941758E2

Definition at line 107 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::SX3asin = -3.838770957603691357202E2

Definition at line 108 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::SX4asin = 3.424398657913078477438E2

Definition at line 109 of file VDTMath.h.

Referenced by fast_asin().

constexpr double vdt::T3PO8 = 2.41421356237309504880

Definition at line 140 of file VDTMath.h.

Referenced by fast_atan().

constexpr double vdt::TAN_LIMIT = TWOPI

Definition at line 137 of file VDTMath.h.

constexpr double vdt::TWOPI = 2.*M_PI

Definition at line 75 of file VDTMath.h.

Referenced by fast_cos(), and fast_sin().