CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_5_3_14/src/DataFormats/Math/src/VDTMath.cc

Go to the documentation of this file.
00001 /*******************************************************************************
00002  *
00003  * VDT math library: collection of double precision vectorisable trascendental
00004  * functions.
00005  * The c++11 standard is used: remember to enable it for the compilation.
00006  *
00007  * The basic idea is to exploit pade polinomials.
00008  * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
00009  * moshier@na-net.ornl.gov) as well as actual code for the exp, log, sin, cos, 
00010  * tan, asin, acos and atan functions. The Cephes library can be found here:
00011  * http://www.netlib.org/cephes/
00012  * 
00013  ******************************************************************************/
00014 
00015 #include "DataFormats/Math/interface/VDTMath.h"
00016 //#include "VDTMath.h"
00017 #include <stdlib.h>
00018 #include <iostream>
00019 #include <cassert>
00020 #include <iomanip>
00021 
00022 //------------------------------------------------------------------------------
00023 
00024 void vdt::print_instructions_info(){
00029   std::cout << "\nList of enabled instructions' sets:\n";
00030   
00031 #ifdef __SSE2__
00032         std::cout << " - SSE2 instructions enabled" << std::endl;
00033 #else
00034         std::cout << " - SSE2 instructions *not* enabled" << std::endl;
00035 #endif
00036 #ifdef __SSE3__
00037         std::cout << " - SSE3 instructions enabled" << std::endl;
00038 #else
00039         std::cout << " - SSE3 instructions *not* enabled" << std::endl;
00040 #endif
00041 
00042 #ifdef __SSE4_1__
00043         std::cout << " - SSE4.1 instructions enabled" << std::endl;
00044 #else
00045         std::cout << " - SSE4.1 instructions *not* enabled" << std::endl;
00046 #endif
00047 #ifdef __AVX__
00048         std::cout << " - AVX instructions enabled" << std::endl;
00049 #else
00050         std::cout << " - AVX instructions *not* enabled" << std::endl;
00051 #endif
00052        std::cout << "\n\n";
00053     }
00054 
00055 
00056 /*******************************************************************************
00057  * 
00058  * EXP IMPLEMENTATIONS
00059  * 
00060  ******************************************************************************/ 
00061 
00063 void vdt::fast_exp_vect_46(double const * input,
00064                            double * output,
00065                            const unsigned int arr_size){
00066 
00067   // input & output must not point to the same memory location
00068   //    assert( input != output );
00069 
00070   int n;
00071   int* nv = new int[arr_size];
00072   double xx, px, qx;
00073   ieee754 u;
00074 
00075   // for vectorisation
00076   double x;
00077 
00078   for (unsigned int i = 0; i < arr_size; ++i)
00079      nv[i] = n = std::floor( LOG2E * input[i] + 0.5 );
00080 
00081 
00082   //Profitability threshold = 7
00083   for (unsigned int i = 0; i < arr_size; ++i) {
00084     x = input[i];
00085 
00086     //nv[i] = n = int(LOG2E * x + 0.5);//std::floor( LOG2E * x + 0.5 );
00087     n=nv[i];
00088 
00089     px = n;
00090 
00091     x -= px * 6.93145751953125E-1;
00092     x -= px * 1.42860682030941723212E-6;
00093 
00094     xx = x * x;
00095 
00096     // px = x * P(x**2).
00097     px = PX1exp;
00098     px *= xx;
00099     px += PX2exp;
00100     px *= xx;
00101     px += PX3exp;
00102     px *= x;
00103 
00104     // Evaluate Q(x**2).
00105     qx = QX1exp;
00106     qx *= xx;
00107     qx += QX2exp;
00108     qx *= xx;
00109     qx += QX3exp;
00110     qx *= xx;
00111     qx += QX4exp;
00112 
00113     // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) )
00114     x = px / (qx - px);
00115     x = 1.0 + 2.0 * x;
00116 
00117     // partial
00118     output[i]=x;
00119                   
00120     } // end loop on input vals
00121    
00122   //Profitability threshold = 4
00123   for (unsigned int i = 0; i < arr_size; ++i) {
00124       //     Build 2^n in double.
00125       n=nv[i];
00126       u.d = 0;
00127       n += 1023;    
00128       u.ll = (long long) (n) << 52;
00129       output[i] = output[i] * u.d; 
00130 
00131       if (input[i] > EXP_LIMIT)
00132               output[i] = std::numeric_limits<double>::infinity();
00133       if (input[i] < -EXP_LIMIT)
00134               output[i] = 0.;     
00135       }
00136       
00137    delete [] nv;
00138 }
00139 
00140 //------------------------------------------------------------------------------
00141 
00142 /*******************************************************************************
00143  * 
00144  * LOG IMPLEMENTATIONS
00145  * 
00146  ******************************************************************************/ 
00147  
00148 //------------------------------------------------------------------------------  
00149 // The implementation is such in order to vectorise also with gcc461
00150 void vdt::fast_log_vect_46(double const * original_input, 
00151                            double* output, 
00152                            const unsigned int arr_size){
00153 
00154   double* input = new double [arr_size];
00155   double* x_arr = new double [arr_size];
00156   int* fe_arr = new int [arr_size];
00157   
00158   double y, z;
00159   double px,qx;
00160   double x;
00161   int fe;
00162   // Profitability threshold = 4
00163   for (unsigned int i = 0; i < arr_size; ++i) {
00164     input[i] = original_input[i];
00165     x= input[i];
00166 
00167 
00168     /* separate mantissa from exponent
00169     */
00170     
00171 //    double input_x=x;
00172 
00173     /* separate mantissa from exponent */    
00174     double fe;
00175     x = getMantExponent(x,fe);
00176 
00177     // blending      
00178     if( x < SQRTH ) {
00179       fe-=1;
00180       x +=  x ;
00181       }
00182     x -= 1.0;
00183     
00184     x_arr[i]=x;
00185     fe_arr[i]=fe;
00186     
00187     }
00188   // Profitability threshold = 7
00189   for (unsigned int i = 0; i < arr_size; ++i) {
00190     
00191     x = x_arr[i];
00192     fe = fe_arr[i];
00193     
00194     /* rational form */
00195 
00196     z = x*x;
00197     px =  PX1log;
00198     px *= x;    
00199     px += PX2log;
00200     px *= x;    
00201     px += PX3log;
00202     px *= x; 
00203     px += PX4log;
00204     px *= x; 
00205     px += PX5log;
00206     px *= x;
00207     px += PX6log;
00208     //
00209     //for the final formula
00210     px *= x; 
00211     px *= z;
00212 
00213 
00214     qx = x;
00215     qx += QX1log;
00216     qx *=x;
00217     qx += QX2log;
00218     qx *=x;    
00219     qx += QX3log;
00220     qx *=x;    
00221     qx += QX4log;
00222     qx *=x;    
00223     qx += QX5log;
00224 
00225     
00226     y = px / qx ;
00227    
00228     y -= fe * 2.121944400546905827679e-4; 
00229     y -= 0.5 * z  ;
00230     
00231     z = x + y;
00232     z += fe * 0.693359375;
00233     output[i]= z;
00234   }
00235     
00236   for (unsigned int i = 0; i < arr_size; ++i) {    
00237     if (original_input[i] > LOG_UPPER_LIMIT)
00238       output[i] = std::numeric_limits<double>::infinity();
00239     if (original_input[i] < LOG_LOWER_LIMIT)
00240       output[i] =  - std::numeric_limits<double>::infinity();                             
00241     }
00242   
00243   delete [] input;
00244   delete [] x_arr;
00245   delete [] fe_arr;
00246   
00247 }
00248 
00249 
00250 //------------------------------------------------------------------------------
00251