CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
Classes | Namespaces | Macros | Functions
approx_exp.h File Reference
#include <cstdint>
#include <cmath>
#include <limits>
#include <algorithm>

Go to the source code of this file.

Classes

union  approx_math::binary32
 

Namespaces

 approx_math
 

Macros

#define APPROX_MATH_N
 

Functions

template<int DEGREE>
float approx_expf (float x)
 
template<int DEGREE>
float approx_expf_P (float p)
 
template<>
float approx_expf_P< 2 > (float y)
 
template<>
float approx_expf_P< 3 > (float y)
 
template<>
float approx_expf_P< 4 > (float y)
 
template<>
float approx_expf_P< 5 > (float y)
 
template<>
float approx_expf_P< 6 > (float y)
 
template<>
float approx_expf_P< 7 > (float y)
 
float approx_math::fpfloor (float x)
 
template<int DEGREE>
float unsafe_expf (float x)
 
template<int DEGREE>
float unsafe_expf_impl (float x)
 

Macro Definition Documentation

#define APPROX_MATH_N

Definition at line 43 of file approx_exp.h.

Function Documentation

template<int DEGREE>
float approx_expf ( float  x)
inline

Definition at line 203 of file approx_exp.h.

References constexpr, bookConverter::max, min(), alignCSCRings::r, and x.

203  {
204 
205  constexpr float inf_threshold =float(0x5.8b90cp4);
206  // log of the smallest normal
207  constexpr float zero_threshold_ftz =-float(0x5.75628p4); // sollya: single(log(1b-126));
208  // flush to zero on the output
209  // manage infty output:
210  // faster than directly on output!
211  x = std::min(std::max(x,zero_threshold_ftz),inf_threshold);
212  float r = unsafe_expf<DEGREE>(x);
213 
214  return r;
215 }
#define constexpr
T min(T a, T b)
Definition: MathUtil.h:58
Definition: DDAxes.h:10
template<int DEGREE>
float approx_expf_P ( float  p)
inline
template<>
float approx_expf_P< 2 > ( float  y)
inline

Definition at line 78 of file approx_exp.h.

References detailsBasic3DVector::y.

78  {
79  return float(0x2.p0) + y * (float(0x2.07b99p0) + y * float(0x1.025b84p0)) ;
80 }
template<>
float approx_expf_P< 3 > ( float  y)
inline

Definition at line 83 of file approx_exp.h.

References detailsBasic3DVector::y.

83  {
84 #ifdef HORNER // HORNER
85  return float(0x2.p0) + y * (float(0x1.fff798p0) + y * (float(0x1.02249p0) + y * float(0x5.62042p-4))) ;
86 #else // ESTRIN
87  float p23 = (float(0x1.02249p0) + y * float(0x5.62042p-4)) ;
88  float p01 = float(0x2.p0) + y * float(0x1.fff798p0);
89  return p01 + y*y*p23;
90 #endif
91 }
template<>
float approx_expf_P< 4 > ( float  y)
inline

Definition at line 94 of file approx_exp.h.

References detailsBasic3DVector::y.

94  {
95  return float(0x2.p0) + y * (float(0x1.fffb1p0) + y * (float(0xf.ffe84p-4) + y * (float(0x5.5f9c1p-4) + y * float(0x1.57755p-4)))) ;
96 }
template<>
float approx_expf_P< 5 > ( float  y)
inline

Definition at line 99 of file approx_exp.h.

References detailsBasic3DVector::y.

99  {
100  return float(0x2.p0) + y * (float(0x2.p0) + y * (float(0xf.ffed8p-4) + y * (float(0x5.5551cp-4) + y * (float(0x1.5740d8p-4) + y * float(0x4.49368p-8))))) ;
101 }
template<>
float approx_expf_P< 6 > ( float  y)
inline

Definition at line 104 of file approx_exp.h.

References AlCaHLTBitMon_ParallelJobs::p, and detailsBasic3DVector::y.

104  {
105 #ifdef HORNER // HORNER
106  float p = float(0x2.p0) + y * (float(0x2.p0) + y * (float(0x1.p0) + y * (float(0x5.55523p-4) + y * (float(0x1.5554dcp-4) + y * (float(0x4.48f41p-8) + y * float(0xb.6ad4p-12)))))) ;
107 #else // ESTRIN does seem to save a cycle or two
108  float p56 = float(0x4.48f41p-8) + y * float(0xb.6ad4p-12);
109  float p34 = float(0x5.55523p-4) + y * float(0x1.5554dcp-4);
110  float y2 = y*y;
111  float p12 = float(0x2.p0) + y; // By chance we save one operation here! Funny.
112  float p36 = p34 + y2*p56;
113  float p16 = p12 + y2*p36;
114  float p = float(0x2.p0) + y*p16;
115 #endif
116  return p;
117 }
template<>
float approx_expf_P< 7 > ( float  y)
inline

Definition at line 121 of file approx_exp.h.

References detailsBasic3DVector::y.

121  {
122  return float(0x2.p0) + y * (float(0x2.p0) + y * (float(0x1.p0) + y * (float(0x5.55555p-4) + y * (float(0x1.5554e4p-4) + y * (float(0x4.444adp-8) + y * (float(0xb.6a8a6p-12) + y * float(0x1.9ec814p-12))))))) ;
123 }
template<int DEGREE>
float unsafe_expf ( float  x)
inline

Definition at line 198 of file approx_exp.h.

References x.

198  {
199  return unsafe_expf_impl<DEGREE>(x);
200 }
Definition: DDAxes.h:10
template<int DEGREE>
float unsafe_expf_impl ( float  x)
inline

Definition at line 149 of file approx_exp.h.

References constexpr, alignCSCRings::e, f, approx_math::fpfloor(), AlCaHLTBitMon_ParallelJobs::p, x, detailsBasic3DVector::y, and detailsBasic3DVector::z.

149  {
150  using namespace approx_math;
151  /* Sollya for the following constants:
152  display=hexadecimal;
153  1b23+1b22;
154  single(1/log(2));
155  log2H=round(log(2), 16, RN);
156  log2L = single(log(2)-log2H);
157  log2H; log2L;
158 
159  */
160  // constexpr float rnd_cst = float(0xc.p20);
161  constexpr float inv_log2f = float(0x1.715476p0);
162  constexpr float log2H = float(0xb.172p-4);
163  constexpr float log2L = float(0x1.7f7d1cp-20);
164 
165 
166  float y = x;
167  // This is doing round(x*inv_log2f) to the nearest integer
168  float z = fpfloor((x*inv_log2f) +0.5f);
169  // Cody-and-Waite accurate range reduction. FMA-safe.
170  y -= z*log2H;
171  y -= z*log2L;
172  // exponent
173  int32_t e = z;
174 
175 
176  // we want RN above because it centers the interval around zero
177  // but then we could have 2^e = below being infinity when it shouldn't
178  // (when e=128 but p<1)
179  // so we avoid this case by reducing e and evaluating a polynomial for 2*exp
180  e -=1;
181 
182  // NaN inputs will propagate to the output as expected
183 
184  float p = approx_expf_P<DEGREE>(y);
185 
186  // cout << "x=" << x << " e=" << e << " y=" << y << " p=" << p <<"\n";
187  binary32 ef;
188  uint32_t biased_exponent= e+127;
189  ef.ui32=(biased_exponent<<23);
190 
191  return p * ef.f;
192 }
#define constexpr
float float float z
double f[11][100]
float fpfloor(float x)
Definition: approx_exp.h:61
Definition: DDAxes.h:10