CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_6_1_1/src/DataFormats/Math/interface/approx_log.h

Go to the documentation of this file.
00001 #ifndef APPROX_LOG_H
00002 #define APPROX_LOG_H
00003 /*  Quick and dirty, branchless, log implementations
00004     Author: Florent de Dinechin, Aric, ENS-Lyon 
00005     All right reserved
00006 
00007 Warning + disclaimers:
00008 - no special case handling (infinite/NaN inputs, even zero input, etc)
00009 - no input subnormal handling, you'll get completely wrong results.
00010   This is the worst problem IMHO (leading to very rare but very bad bugs)
00011   However it is probable you can guarantee that your input numbers 
00012   are never subnormal, check that. Otherwise I'll fix it...
00013 - output accuracy reported is only absolute. 
00014   Relative accuracy may be arbitrary bad around log(1), 
00015   especially for approx_log0. approx_logf is more or less OK.
00016 - The larger/smaller the input x (i.e. away from 1), the better the accuracy.
00017 - For the higher degree polynomials it is possible to win a few cycles 
00018   by parallelizing the evaluation of the polynomial (Estrin). 
00019   It doesn't make much sense if you want to make a vector function. 
00020 - All this code is FMA-safe (and accelerated by FMA)
00021  
00022 Feel free to distribute or insert in other programs etc, as long as this notice is attached.
00023     Comments, requests etc: Florent.de.Dinechin@ens-lyon.fr
00024 
00025 Polynomials were obtained using Sollya scripts (in comments): 
00026 please also keep these comments attached to the code of approx_logf. 
00027 */
00028 
00029 #include <cstdint>
00030 #include <cmath>
00031 #include <limits>
00032 #include <algorithm>
00033 
00034 
00035 #ifndef APPROX_MATH_N
00036 #define APPROX_MATH_N
00037 namespace approx_math {
00038   union binary32 {
00039     binary32() : ui32(0) {};
00040     binary32(float ff) : f(ff) {};
00041     binary32(int32_t ii) : i32(ii){}
00042     binary32(uint32_t ui) : ui32(ui){}
00043     
00044     uint32_t ui32; /* unsigned int */                
00045     int32_t i32; /* Signed int */                
00046     float f;
00047   };
00048 }
00049 #endif
00050 
00051 
00052 template<int DEGREE>
00053 inline float approx_logf_P(float p);
00054 
00055 
00056 // the following is Sollya output
00057 
00058 // degree =  2   => absolute accuracy is  7 bits
00059 template<>
00060 inline float approx_logf_P<2>(float y) {
00061   return  y * ( float(0x1.0671c4p0) + y * ( float(-0x7.27744p-4) )) ;
00062 }
00063 
00064 // degree =  3   => absolute accuracy is  10 bits
00065 template<>
00066 inline float approx_logf_P<3>(float y) {
00067   return  y * (float(0x1.013354p0) + y * (-float(0x8.33006p-4) + y * float(0x4.0d16cp-4))) ;
00068 }                                 
00069 
00070 // degree =  4   => absolute accuracy is  13 bits
00071 template<>
00072 inline float approx_logf_P<4>(float y) {
00073   return  y * (float(0xf.ff5bap-4) + y * (-float(0x8.13e5ep-4) + y * (float(0x5.826ep-4) + y * (-float(0x2.e87fb8p-4))))) ;
00074 }
00075 
00076 // degree =  5   => absolute accuracy is  16 bits
00077 template<>
00078 inline float approx_logf_P<5>(float y) {
00079   return  y * (float(0xf.ff652p-4) + y * (-float(0x8.0048ap-4) + y * (float(0x5.72782p-4) + y * (-float(0x4.20904p-4) + y * float(0x2.1d7fd8p-4))))) ;
00080 }
00081 
00082 // degree =  6   => absolute accuracy is  19 bits
00083 template<>
00084 inline float approx_logf_P<6>(float y) {
00085   return  y * (float(0xf.fff14p-4) + y * (-float(0x7.ff4bfp-4) + y * (float(0x5.582f6p-4) + y * (-float(0x4.1dcf2p-4) + y * (float(0x3.3863f8p-4) + y * (-float(0x1.9288d4p-4))))))) ;
00086 }
00087 
00088 // degree =  7   => absolute accuracy is  21 bits
00089 template<>
00090 inline float approx_logf_P<7>(float y) {
00091   return  y * (float(0x1.000034p0) + y * (-float(0x7.ffe57p-4) + y * (float(0x5.5422ep-4) + y * (-float(0x4.037a6p-4) + y * (float(0x3.541c88p-4) + y * (-float(0x2.af842p-4) + y * float(0x1.48b3d8p-4))))))) ;
00092 }
00093 
00094 // degree =  8   => absolute accuracy is  24 bits
00095 template<>
00096 inline float approx_logf_P<8>(float y) {
00097    return  y * ( float(0x1.00000cp0) + y * (float(-0x8.0003p-4) + y * (float(0x5.55087p-4) + y * ( float(-0x3.fedcep-4) + y * (float(0x3.3a1dap-4) + y * (float(-0x2.cb55fp-4) + y * (float(0x2.38831p-4) + y * (float(-0xf.e87cap-8) )))))))) ;
00098 }
00099 
00100 
00101 
00102 template<int DEGREE>
00103 inline float unsafe_logf_impl(float x) {
00104   using namespace approx_math;
00105 
00106   binary32 xx,m;
00107   xx.f = x;
00108   
00109   // as many integer computations as possible, most are 1-cycle only, and lots of ILP.
00110   int e= (((xx.i32) >> 23) & 0xFF) -127; // extract exponent
00111   m.i32 = (xx.i32 & 0x007FFFFF) | 0x3F800000; // extract mantissa as an FP number
00112   
00113   int adjust = (xx.i32>>22)&1; // first bit of the mantissa, tells us if 1.m > 1.5
00114   m.i32 -= adjust << 23; // if so, divide 1.m by 2 (exact operation, no rounding)
00115   e += adjust;           // and update exponent so we still have x=2^E*y
00116   
00117   // now back to floating-point
00118   float y = m.f -1.0f; // Sterbenz-exact; cancels but we don't care about output relative error
00119   // all the computations so far were free of rounding errors...
00120 
00121   // the following is based on Sollya output
00122   float p = approx_logf_P<DEGREE>(y);
00123   
00124 
00125   constexpr float Log2=0xb.17218p-4; // 0.693147182464599609375 
00126   return float(e)*Log2+p;
00127 
00128 }
00129 
00130 #ifndef NO_APPROX_MATH
00131 template<int DEGREE>
00132 inline float unsafe_logf(float x) {
00133   return unsafe_logf_impl<DEGREE>(x);
00134 }
00135 
00136 template<int DEGREE>
00137 inline float approx_logf(float x) {
00138   using namespace approx_math;
00139 
00140 
00141   constexpr float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
00142 
00143   //x = std::max(std::min(x,MAXNUMF),0.f);
00144   float  res = unsafe_logf<DEGREE>(x);
00145   res =  (x<MAXNUMF) ? res : std::numeric_limits<float>::infinity();
00146   return (x>0) ? res :std::numeric_limits<float>::quiet_NaN();
00147 }
00148 
00149 #else
00150 template<int DEGREE>
00151 inline float unsafe_logf(float x) {
00152   return std::log(x);
00153 }
00154 
00155 template<int DEGREE>
00156 inline float approx_logf(float x) {
00157   return std::log(x);
00158 }
00159 
00160 
00161 #endif // NO_APPROX_MATH
00162 
00163 #endif