Go to the documentation of this file.00001 #ifndef APPROX_EXP_H
00002 #define APPROX_EXP_H
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include <cstdint>
00038 #include <cmath>
00039 #include <limits>
00040 #include <algorithm>
00041
00042 #ifndef APPROX_MATH_N
00043 #define APPROX_MATH_N
00044 namespace approx_math {
00045 union binary32 {
00046 binary32() : ui32(0) {};
00047 binary32(float ff) : f(ff) {};
00048 binary32(int32_t ii) : i32(ii){}
00049 binary32(uint32_t ui) : ui32(ui){}
00050
00051 uint32_t ui32;
00052 int32_t i32;
00053 float f;
00054 };
00055
00056 #ifdef __SSE4_1__
00057 inline float fpfloor(float x) {
00058 return std::floor(x);
00059 }
00060 #else
00061 inline float fpfloor(float x) {
00062 int32_t ret = x;
00063 binary32 xx(x);
00064 ret-=(xx.ui32>>31);
00065 return ret;
00066 }
00067 #endif
00068
00069 }
00070 #endif
00071
00072
00073 template<int DEGREE>
00074 inline float approx_expf_P(float p);
00075
00076
00077 template<>
00078 inline float approx_expf_P<2>(float y) {
00079 return float(0x2.p0) + y * (float(0x2.07b99p0) + y * float(0x1.025b84p0)) ;
00080 }
00081
00082 template<>
00083 inline float approx_expf_P<3>(float y) {
00084 #ifdef HORNER // HORNER
00085 return float(0x2.p0) + y * (float(0x1.fff798p0) + y * (float(0x1.02249p0) + y * float(0x5.62042p-4))) ;
00086 #else // ESTRIN
00087 float p23 = (float(0x1.02249p0) + y * float(0x5.62042p-4)) ;
00088 float p01 = float(0x2.p0) + y * float(0x1.fff798p0);
00089 return p01 + y*y*p23;
00090 #endif
00091 }
00092
00093 template<>
00094 inline float approx_expf_P<4>(float y) {
00095 return float(0x2.p0) + y * (float(0x1.fffb1p0) + y * (float(0xf.ffe84p-4) + y * (float(0x5.5f9c1p-4) + y * float(0x1.57755p-4)))) ;
00096 }
00097
00098 template<>
00099 inline float approx_expf_P<5>(float y) {
00100 return float(0x2.p0) + y * (float(0x2.p0) + y * (float(0xf.ffed8p-4) + y * (float(0x5.5551cp-4) + y * (float(0x1.5740d8p-4) + y * float(0x4.49368p-8))))) ;
00101 }
00102
00103 template<>
00104 inline float approx_expf_P<6>(float y) {
00105 #ifdef HORNER // HORNER
00106 float p = float(0x2.p0) + y * (float(0x2.p0) + y * (float(0x1.p0) + y * (float(0x5.55523p-4) + y * (float(0x1.5554dcp-4) + y * (float(0x4.48f41p-8) + y * float(0xb.6ad4p-12)))))) ;
00107 #else // ESTRIN does seem to save a cycle or two
00108 float p56 = float(0x4.48f41p-8) + y * float(0xb.6ad4p-12);
00109 float p34 = float(0x5.55523p-4) + y * float(0x1.5554dcp-4);
00110 float y2 = y*y;
00111 float p12 = float(0x2.p0) + y;
00112 float p36 = p34 + y2*p56;
00113 float p16 = p12 + y2*p36;
00114 float p = float(0x2.p0) + y*p16;
00115 #endif
00116 return p;
00117 }
00118
00119
00120 template<>
00121 inline float approx_expf_P<7>(float y) {
00122 return float(0x2.p0) + y * (float(0x2.p0) + y * (float(0x1.p0) + y * (float(0x5.55555p-4) + y * (float(0x1.5554e4p-4) + y * (float(0x4.444adp-8) + y * (float(0xb.6a8a6p-12) + y * float(0x1.9ec814p-12))))))) ;
00123 }
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148 template<int DEGREE>
00149 inline float unsafe_expf_impl(float x) {
00150 using namespace approx_math;
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161 constexpr float inv_log2f = float(0x1.715476p0);
00162 constexpr float log2H = float(0xb.172p-4);
00163 constexpr float log2L = float(0x1.7f7d1cp-20);
00164
00165
00166 float y = x;
00167
00168 float z = fpfloor((x*inv_log2f) +0.5f);
00169
00170 y -= z*log2H;
00171 y -= z*log2L;
00172
00173 int32_t e = z;
00174
00175
00176
00177
00178
00179
00180 e -=1;
00181
00182
00183
00184 float p = approx_expf_P<DEGREE>(y);
00185
00186
00187 binary32 ef;
00188 uint32_t biased_exponent= e+127;
00189 ef.ui32=(biased_exponent<<23);
00190
00191 return p * ef.f;
00192 }
00193
00194
00195 #ifndef NO_APPROX_MATH
00196
00197 template<int DEGREE>
00198 inline float unsafe_expf(float x) {
00199 return unsafe_expf_impl<DEGREE>(x);
00200 }
00201
00202 template<int DEGREE>
00203 inline float approx_expf(float x) {
00204
00205 constexpr float inf_threshold =float(0x5.8b90cp4);
00206
00207 constexpr float zero_threshold_ftz =-float(0x5.75628p4);
00208
00209
00210
00211 x = std::min(std::max(x,zero_threshold_ftz),inf_threshold);
00212 float r = unsafe_expf<DEGREE>(x);
00213
00214 return r;
00215 }
00216
00217
00218 #else
00219 template<int DEGREE>
00220 inline float unsafe_expf(float x) {
00221 return std::exp(x);
00222 }
00223 template<int DEGREE>
00224 inline float approx_expf(float x) {
00225 return std::exp(x);
00226 }
00227 #endif // NO_APPROX_MATH
00228
00229
00230
00231 #endif