CMS 3D CMS Logo

approx_log.h
Go to the documentation of this file.
1 #ifndef DataFormatsMathAPPROX_LOG_H
2 #define DataFormatsMathAPPROX_LOG_H
3 /* Quick and dirty, branchless, log implementations
4  Author: Florent de Dinechin, Aric, ENS-Lyon
5  All right reserved
6 
7 Warning + disclaimers:
8 - no special case handling (infinite/NaN inputs, even zero input, etc)
9 - no input subnormal handling, you'll get completely wrong results.
10  This is the worst problem IMHO (leading to very rare but very bad bugs)
11  However it is probable you can guarantee that your input numbers
12  are never subnormal, check that. Otherwise I'll fix it...
13 - output accuracy reported is only absolute.
14  Relative accuracy may be arbitrary bad around log(1),
15  especially for approx_log0. approx_logf is more or less OK.
16 - The larger/smaller the input x (i.e. away from 1), the better the accuracy.
17 - For the higher degree polynomials it is possible to win a few cycles
18  by parallelizing the evaluation of the polynomial (Estrin).
19  It doesn't make much sense if you want to make a vector function.
20 - All this code is FMA-safe (and accelerated by FMA)
21 
22 Feel free to distribute or insert in other programs etc, as long as this notice is attached.
23  Comments, requests etc: Florent.de.Dinechin@ens-lyon.fr
24 
25 Polynomials were obtained using Sollya scripts (in comments):
26 please also keep these comments attached to the code of approx_logf.
27 */
28 
30 
31 template <int DEGREE>
32 constexpr float approx_logf_P(float p);
33 
34 // the following is Sollya output
35 
36 // degree = 2 => absolute accuracy is 7 bits
37 template <>
38 constexpr float approx_logf_P<2>(float y) {
39  return y * (float(0x1.0671c4p0) + y * (float(-0x7.27744p-4)));
40 }
41 
42 // degree = 3 => absolute accuracy is 10 bits
43 template <>
44 constexpr float approx_logf_P<3>(float y) {
45  return y * (float(0x1.013354p0) + y * (-float(0x8.33006p-4) + y * float(0x4.0d16cp-4)));
46 }
47 
48 // degree = 4 => absolute accuracy is 13 bits
49 template <>
50 constexpr float approx_logf_P<4>(float y) {
51  return y *
52  (float(0xf.ff5bap-4) + y * (-float(0x8.13e5ep-4) + y * (float(0x5.826ep-4) + y * (-float(0x2.e87fb8p-4)))));
53 }
54 
55 // degree = 5 => absolute accuracy is 16 bits
56 template <>
57 constexpr float approx_logf_P<5>(float y) {
58  return y * (float(0xf.ff652p-4) +
59  y * (-float(0x8.0048ap-4) +
60  y * (float(0x5.72782p-4) + y * (-float(0x4.20904p-4) + y * float(0x2.1d7fd8p-4)))));
61 }
62 
63 // degree = 6 => absolute accuracy is 19 bits
64 template <>
65 constexpr float approx_logf_P<6>(float y) {
66  return y * (float(0xf.fff14p-4) +
67  y * (-float(0x7.ff4bfp-4) +
68  y * (float(0x5.582f6p-4) +
69  y * (-float(0x4.1dcf2p-4) + y * (float(0x3.3863f8p-4) + y * (-float(0x1.9288d4p-4)))))));
70 }
71 
72 // degree = 7 => absolute accuracy is 21 bits
73 template <>
74 constexpr float approx_logf_P<7>(float y) {
75  return y * (float(0x1.000034p0) +
76  y * (-float(0x7.ffe57p-4) +
77  y * (float(0x5.5422ep-4) +
78  y * (-float(0x4.037a6p-4) +
79  y * (float(0x3.541c88p-4) + y * (-float(0x2.af842p-4) + y * float(0x1.48b3d8p-4)))))));
80 }
81 
82 // degree = 8 => absolute accuracy is 24 bits
83 template <>
84 constexpr float approx_logf_P<8>(float y) {
85  return y *
86  (float(0x1.00000cp0) +
87  y * (float(-0x8.0003p-4) +
88  y * (float(0x5.55087p-4) +
89  y * (float(-0x3.fedcep-4) +
90  y * (float(0x3.3a1dap-4) +
91  y * (float(-0x2.cb55fp-4) + y * (float(0x2.38831p-4) + y * (float(-0xf.e87cap-8)))))))));
92 }
93 
94 template <int DEGREE>
95 constexpr float unsafe_logf_impl(float x) {
96  using namespace approx_math;
97 
98  binary32 xx, m;
99  xx.f = x;
100 
101  // as many integer computations as possible, most are 1-cycle only, and lots of ILP.
102  int e = (((xx.i32) >> 23) & 0xFF) - 127; // extract exponent
103  m.i32 = (xx.i32 & 0x007FFFFF) | 0x3F800000; // extract mantissa as an FP number
104 
105  int adjust = (xx.i32 >> 22) & 1; // first bit of the mantissa, tells us if 1.m > 1.5
106  m.i32 -= adjust << 23; // if so, divide 1.m by 2 (exact operation, no rounding)
107  e += adjust; // and update exponent so we still have x=2^E*y
108 
109  // now back to floating-point
110  float y = m.f - 1.0f; // Sterbenz-exact; cancels but we don't care about output relative error
111  // all the computations so far were free of rounding errors...
112 
113  // the following is based on Sollya output
114  float p = approx_logf_P<DEGREE>(y);
115 
116  constexpr float Log2 = 0xb.17218p-4; // 0.693147182464599609375
117  return float(e) * Log2 + p;
118 }
119 
120 #ifndef NO_APPROX_MATH
121 template <int DEGREE>
122 constexpr float unsafe_logf(float x) {
123  return unsafe_logf_impl<DEGREE>(x);
124 }
125 
126 template <int DEGREE>
127 constexpr float approx_logf(float x) {
128  using namespace approx_math;
129 
130  constexpr float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
131 
132  //x = std::max(std::min(x,MAXNUMF),0.f);
133  float res = unsafe_logf<DEGREE>(x);
135  return (x > 0) ? res : std::numeric_limits<float>::quiet_NaN();
136 }
137 
138 #else
139 template <int DEGREE>
140 constexpr float unsafe_logf(float x) {
141  return std::log(x);
142 }
143 
144 template <int DEGREE>
145 constexpr float approx_logf(float x) {
146  return std::log(x);
147 }
148 
149 #endif // NO_APPROX_MATH
150 
151 #endif
constexpr float approx_logf_P< 2 >(float y)
Definition: approx_log.h:38
constexpr float approx_logf_P< 7 >(float y)
Definition: approx_log.h:74
constexpr float unsafe_logf(float x)
Definition: approx_log.h:122
constexpr float approx_logf_P< 4 >(float y)
Definition: approx_log.h:50
Definition: Electron.h:6
constexpr float approx_logf_P< 5 >(float y)
Definition: approx_log.h:57
constexpr float approx_logf_P(float p)
constexpr float unsafe_logf_impl(float x)
Definition: approx_log.h:95
const double infinity
constexpr float approx_logf_P< 8 >(float y)
Definition: approx_log.h:84
constexpr float approx_logf_P< 6 >(float y)
Definition: approx_log.h:65
constexpr float approx_logf_P< 3 >(float y)
Definition: approx_log.h:44
constexpr float approx_logf(float x)
Definition: approx_log.h:127
float x