CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
approx_log.h
Go to the documentation of this file.
1 #ifndef APPROX_LOG_H
2 #define APPROX_LOG_H
3 /* Quick and dirty, branchless, log implementations
4  Author: Florent de Dinechin, Aric, ENS-Lyon
5  All right reserved
6 
7 Warning + disclaimers:
8 - no special case handling (infinite/NaN inputs, even zero input, etc)
9 - no input subnormal handling, you'll get completely wrong results.
10  This is the worst problem IMHO (leading to very rare but very bad bugs)
11  However it is probable you can guarantee that your input numbers
12  are never subnormal, check that. Otherwise I'll fix it...
13 - output accuracy reported is only absolute.
14  Relative accuracy may be arbitrary bad around log(1),
15  especially for approx_log0. approx_logf is more or less OK.
16 - The larger/smaller the input x (i.e. away from 1), the better the accuracy.
17 - For the higher degree polynomials it is possible to win a few cycles
18  by parallelizing the evaluation of the polynomial (Estrin).
19  It doesn't make much sense if you want to make a vector function.
20 - All this code is FMA-safe (and accelerated by FMA)
21 
22 Feel free to distribute or insert in other programs etc, as long as this notice is attached.
23  Comments, requests etc: Florent.de.Dinechin@ens-lyon.fr
24 
25 Polynomials were obtained using Sollya scripts (in comments):
26 please also keep these comments attached to the code of approx_logf.
27 */
28 
29 #include <cstdint>
30 #include <cmath>
31 #include <limits>
32 #include <algorithm>
33 
34 
35 #ifndef APPROX_MATH_N
36 #define APPROX_MATH_N
37 namespace approx_math {
38  union binary32 {
39  binary32() : ui32(0) {};
40  binary32(float ff) : f(ff) {};
41  binary32(int32_t ii) : i32(ii){}
42  binary32(uint32_t ui) : ui32(ui){}
43 
44  uint32_t ui32; /* unsigned int */
45  int32_t i32; /* Signed int */
46  float f;
47  };
48 }
49 #endif
50 
51 
52 template<int DEGREE>
53 inline float approx_logf_P(float p);
54 
55 
56 // the following is Sollya output
57 
58 // degree = 2 => absolute accuracy is 7 bits
59 template<>
60 inline float approx_logf_P<2>(float y) {
61  return y * ( float(0x1.0671c4p0) + y * ( float(-0x7.27744p-4) )) ;
62 }
63 
64 // degree = 3 => absolute accuracy is 10 bits
65 template<>
66 inline float approx_logf_P<3>(float y) {
67  return y * (float(0x1.013354p0) + y * (-float(0x8.33006p-4) + y * float(0x4.0d16cp-4))) ;
68 }
69 
70 // degree = 4 => absolute accuracy is 13 bits
71 template<>
72 inline float approx_logf_P<4>(float y) {
73  return y * (float(0xf.ff5bap-4) + y * (-float(0x8.13e5ep-4) + y * (float(0x5.826ep-4) + y * (-float(0x2.e87fb8p-4))))) ;
74 }
75 
76 // degree = 5 => absolute accuracy is 16 bits
77 template<>
78 inline float approx_logf_P<5>(float y) {
79  return y * (float(0xf.ff652p-4) + y * (-float(0x8.0048ap-4) + y * (float(0x5.72782p-4) + y * (-float(0x4.20904p-4) + y * float(0x2.1d7fd8p-4))))) ;
80 }
81 
82 // degree = 6 => absolute accuracy is 19 bits
83 template<>
84 inline float approx_logf_P<6>(float y) {
85  return y * (float(0xf.fff14p-4) + y * (-float(0x7.ff4bfp-4) + y * (float(0x5.582f6p-4) + y * (-float(0x4.1dcf2p-4) + y * (float(0x3.3863f8p-4) + y * (-float(0x1.9288d4p-4))))))) ;
86 }
87 
88 // degree = 7 => absolute accuracy is 21 bits
89 template<>
90 inline float approx_logf_P<7>(float y) {
91  return y * (float(0x1.000034p0) + y * (-float(0x7.ffe57p-4) + y * (float(0x5.5422ep-4) + y * (-float(0x4.037a6p-4) + y * (float(0x3.541c88p-4) + y * (-float(0x2.af842p-4) + y * float(0x1.48b3d8p-4))))))) ;
92 }
93 
94 // degree = 8 => absolute accuracy is 24 bits
95 template<>
96 inline float approx_logf_P<8>(float y) {
97  return y * ( float(0x1.00000cp0) + y * (float(-0x8.0003p-4) + y * (float(0x5.55087p-4) + y * ( float(-0x3.fedcep-4) + y * (float(0x3.3a1dap-4) + y * (float(-0x2.cb55fp-4) + y * (float(0x2.38831p-4) + y * (float(-0xf.e87cap-8) )))))))) ;
98 }
99 
100 
101 
102 template<int DEGREE>
103 inline float unsafe_logf_impl(float x) {
104  using namespace approx_math;
105 
106  binary32 xx,m;
107  xx.f = x;
108 
109  // as many integer computations as possible, most are 1-cycle only, and lots of ILP.
110  int e= (((xx.i32) >> 23) & 0xFF) -127; // extract exponent
111  m.i32 = (xx.i32 & 0x007FFFFF) | 0x3F800000; // extract mantissa as an FP number
112 
113  int adjust = (xx.i32>>22)&1; // first bit of the mantissa, tells us if 1.m > 1.5
114  m.i32 -= adjust << 23; // if so, divide 1.m by 2 (exact operation, no rounding)
115  e += adjust; // and update exponent so we still have x=2^E*y
116 
117  // now back to floating-point
118  float y = m.f -1.0f; // Sterbenz-exact; cancels but we don't care about output relative error
119  // all the computations so far were free of rounding errors...
120 
121  // the following is based on Sollya output
122  float p = approx_logf_P<DEGREE>(y);
123 
124 
125  constexpr float Log2=0xb.17218p-4; // 0.693147182464599609375
126  return float(e)*Log2+p;
127 
128 }
129 
130 #ifndef NO_APPROX_MATH
131 template<int DEGREE>
132 inline float unsafe_logf(float x) {
133  return unsafe_logf_impl<DEGREE>(x);
134 }
135 
136 template<int DEGREE>
137 inline float approx_logf(float x) {
138  using namespace approx_math;
139 
140 
141  constexpr float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
142 
143  //x = std::max(std::min(x,MAXNUMF),0.f);
144  float res = unsafe_logf<DEGREE>(x);
146  return (x>0) ? res :std::numeric_limits<float>::quiet_NaN();
147 }
148 
149 #else
150 template<int DEGREE>
151 inline float unsafe_logf(float x) {
152  return std::log(x);
153 }
154 
155 template<int DEGREE>
156 inline float approx_logf(float x) {
157  return std::log(x);
158 }
159 
160 
161 #endif // NO_APPROX_MATH
162 
163 #endif
float approx_logf_P< 8 >(float y)
Definition: approx_log.h:96
float approx_logf_P< 4 >(float y)
Definition: approx_log.h:72
binary32(int32_t ii)
Definition: approx_log.h:41
int ii
Definition: cuy.py:588
float approx_logf_P< 3 >(float y)
Definition: approx_log.h:66
float approx_logf(float x)
Definition: approx_log.h:137
const double infinity
binary32(uint32_t ui)
Definition: approx_log.h:42
float approx_logf_P< 6 >(float y)
Definition: approx_log.h:84
float unsafe_logf_impl(float x)
Definition: approx_log.h:103
float approx_logf_P(float p)
float approx_logf_P< 2 >(float y)
Definition: approx_log.h:60
float unsafe_logf(float x)
Definition: approx_log.h:132
float approx_logf_P< 7 >(float y)
Definition: approx_log.h:90
float approx_logf_P< 5 >(float y)
Definition: approx_log.h:78
Definition: DDAxes.h:10
#define constexpr