CMS 3D CMS Logo

approx_log.h
Go to the documentation of this file.
1 #ifndef DataFormatsMathAPPROX_LOG_H
2 #define DataFormatsMathAPPROX_LOG_H
3 /* Quick and dirty, branchless, log implementations
4  Author: Florent de Dinechin, Aric, ENS-Lyon
5  All right reserved
6 
7 Warning + disclaimers:
8 - no special case handling (infinite/NaN inputs, even zero input, etc)
9 - no input subnormal handling, you'll get completely wrong results.
10  This is the worst problem IMHO (leading to very rare but very bad bugs)
11  However it is probable you can guarantee that your input numbers
12  are never subnormal, check that. Otherwise I'll fix it...
13 - output accuracy reported is only absolute.
14  Relative accuracy may be arbitrary bad around log(1),
15  especially for approx_log0. approx_logf is more or less OK.
16 - The larger/smaller the input x (i.e. away from 1), the better the accuracy.
17 - For the higher degree polynomials it is possible to win a few cycles
18  by parallelizing the evaluation of the polynomial (Estrin).
19  It doesn't make much sense if you want to make a vector function.
20 - All this code is FMA-safe (and accelerated by FMA)
21 
22 Feel free to distribute or insert in other programs etc, as long as this notice is attached.
23  Comments, requests etc: Florent.de.Dinechin@ens-lyon.fr
24 
25 Polynomials were obtained using Sollya scripts (in comments):
26 please also keep these comments attached to the code of approx_logf.
27 */
28 
30 
31 
32 template<int DEGREE>
33 constexpr float approx_logf_P(float p);
34 
35 
36 // the following is Sollya output
37 
38 // degree = 2 => absolute accuracy is 7 bits
39 template<>
41  return y * ( float(0x1.0671c4p0) + y * ( float(-0x7.27744p-4) )) ;
42 }
43 
44 // degree = 3 => absolute accuracy is 10 bits
45 template<>
47  return y * (float(0x1.013354p0) + y * (-float(0x8.33006p-4) + y * float(0x4.0d16cp-4))) ;
48 }
49 
50 // degree = 4 => absolute accuracy is 13 bits
51 template<>
53  return y * (float(0xf.ff5bap-4) + y * (-float(0x8.13e5ep-4) + y * (float(0x5.826ep-4) + y * (-float(0x2.e87fb8p-4))))) ;
54 }
55 
56 // degree = 5 => absolute accuracy is 16 bits
57 template<>
59  return y * (float(0xf.ff652p-4) + y * (-float(0x8.0048ap-4) + y * (float(0x5.72782p-4) + y * (-float(0x4.20904p-4) + y * float(0x2.1d7fd8p-4))))) ;
60 }
61 
62 // degree = 6 => absolute accuracy is 19 bits
63 template<>
65  return y * (float(0xf.fff14p-4) + y * (-float(0x7.ff4bfp-4) + y * (float(0x5.582f6p-4) + y * (-float(0x4.1dcf2p-4) + y * (float(0x3.3863f8p-4) + y * (-float(0x1.9288d4p-4))))))) ;
66 }
67 
68 // degree = 7 => absolute accuracy is 21 bits
69 template<>
71  return y * (float(0x1.000034p0) + y * (-float(0x7.ffe57p-4) + y * (float(0x5.5422ep-4) + y * (-float(0x4.037a6p-4) + y * (float(0x3.541c88p-4) + y * (-float(0x2.af842p-4) + y * float(0x1.48b3d8p-4))))))) ;
72 }
73 
74 // degree = 8 => absolute accuracy is 24 bits
75 template<>
77  return y * ( float(0x1.00000cp0) + y * (float(-0x8.0003p-4) + y * (float(0x5.55087p-4) + y * ( float(-0x3.fedcep-4) + y * (float(0x3.3a1dap-4) + y * (float(-0x2.cb55fp-4) + y * (float(0x2.38831p-4) + y * (float(-0xf.e87cap-8) )))))))) ;
78 }
79 
80 
81 
82 template<int DEGREE>
84  using namespace approx_math;
85 
86  binary32 xx,m;
87  xx.f = x;
88 
89  // as many integer computations as possible, most are 1-cycle only, and lots of ILP.
90  int e= (((xx.i32) >> 23) & 0xFF) -127; // extract exponent
91  m.i32 = (xx.i32 & 0x007FFFFF) | 0x3F800000; // extract mantissa as an FP number
92 
93  int adjust = (xx.i32>>22)&1; // first bit of the mantissa, tells us if 1.m > 1.5
94  m.i32 -= adjust << 23; // if so, divide 1.m by 2 (exact operation, no rounding)
95  e += adjust; // and update exponent so we still have x=2^E*y
96 
97  // now back to floating-point
98  float y = m.f -1.0f; // Sterbenz-exact; cancels but we don't care about output relative error
99  // all the computations so far were free of rounding errors...
100 
101  // the following is based on Sollya output
102  float p = approx_logf_P<DEGREE>(y);
103 
104 
105  constexpr float Log2=0xb.17218p-4; // 0.693147182464599609375
106  return float(e)*Log2+p;
107 
108 }
109 
110 #ifndef NO_APPROX_MATH
111 template<int DEGREE>
112 constexpr float unsafe_logf(float x) {
113  return unsafe_logf_impl<DEGREE>(x);
114 }
115 
116 template<int DEGREE>
117 constexpr float approx_logf(float x) {
118  using namespace approx_math;
119 
120 
121  constexpr float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
122 
123  //x = std::max(std::min(x,MAXNUMF),0.f);
124  float res = unsafe_logf<DEGREE>(x);
126  return (x>0) ? res :std::numeric_limits<float>::quiet_NaN();
127 }
128 
129 #else
130 template<int DEGREE>
131 constexpr float unsafe_logf(float x) {
132  return std::log(x);
133 }
134 
135 template<int DEGREE>
136 constexpr float approx_logf(float x) {
137  return std::log(x);
138 }
139 
140 
141 #endif // NO_APPROX_MATH
142 
143 #endif
constexpr float approx_logf_P< 2 >(float y)
Definition: approx_log.h:40
constexpr float approx_logf_P< 7 >(float y)
Definition: approx_log.h:70
constexpr float unsafe_logf(float x)
Definition: approx_log.h:112
constexpr float approx_logf_P< 4 >(float y)
Definition: approx_log.h:52
#define constexpr
Definition: Electron.h:6
constexpr float approx_logf_P< 5 >(float y)
Definition: approx_log.h:58
constexpr float approx_logf_P(float p)
constexpr float unsafe_logf_impl(float x)
Definition: approx_log.h:83
const double infinity
constexpr float approx_logf_P< 8 >(float y)
Definition: approx_log.h:76
constexpr float approx_logf_P< 6 >(float y)
Definition: approx_log.h:64
constexpr float approx_logf_P< 3 >(float y)
Definition: approx_log.h:46
constexpr float approx_logf(float x)
Definition: approx_log.h:117