#include "DataFormats/Math/interface/approx_math.h"

Functions
template<int DEGREE>
constexpr float	approx_expf (float x)

template<int DEGREE>
constexpr float	approx_expf_P (float p)

template<>
constexpr float	approx_expf_P< 2 > (float y)

template<>
constexpr float	approx_expf_P< 3 > (float y)

template<>
constexpr float	approx_expf_P< 4 > (float y)

template<>
constexpr float	approx_expf_P< 5 > (float y)

template<>
constexpr float	approx_expf_P< 6 > (float y)

template<>
constexpr float	approx_expf_P< 7 > (float y)

template<int DEGREE>
constexpr float	unsafe_expf (float x)

template<int DEGREE>
constexpr float	unsafe_expf_impl (float x)

Function Documentation

template<int DEGREE>

constexpr float approx_expf ( float x )

Definition at line 176 of file approx_exp.h.

References SiStripPI::max, min(), alignCSCRings::r, and gpuClustering::x.

                                      {
   constexpr float inf_threshold = float(0x5.8b90cp4);
   // log of the smallest normal
   constexpr float zero_threshold_ftz = -float(0x5.75628p4);  // sollya: single(log(1b-126));
   // flush to zero on the output
   // manage infty output:
   // faster than directly on output!
   x = std::min(std::max(x, zero_threshold_ftz), inf_threshold);
   float r = unsafe_expf<DEGREE>(x);
 
   return r;
 }

template<int DEGREE>

constexpr float approx_expf_P ( float p )

template<>

constexpr float approx_expf_P< 2 > ( float y )

Definition at line 43 of file approx_exp.h.

References detailsBasic3DVector::y.

                                           {
   return float(0x2.p0) + y * (float(0x2.07b99p0) + y * float(0x1.025b84p0));
 }

template<>

constexpr float approx_expf_P< 3 > ( float y )

Definition at line 48 of file approx_exp.h.

References detailsBasic3DVector::y.

                                           {
 #ifdef HORNER  // HORNER
   return float(0x2.p0) + y * (float(0x1.fff798p0) + y * (float(0x1.02249p0) + y * float(0x5.62042p-4)));
 #else  // ESTRIN
   float p23 = (float(0x1.02249p0) + y * float(0x5.62042p-4));
   float p01 = float(0x2.p0) + y * float(0x1.fff798p0);
   return p01 + y * y * p23;
 #endif
 }

template<>

constexpr float approx_expf_P< 4 > ( float y )

Definition at line 59 of file approx_exp.h.

References detailsBasic3DVector::y.

                                           {
   return float(0x2.p0) +
          y * (float(0x1.fffb1p0) + y * (float(0xf.ffe84p-4) + y * (float(0x5.5f9c1p-4) + y * float(0x1.57755p-4))));
 }

template<>

constexpr float approx_expf_P< 5 > ( float y )

Definition at line 65 of file approx_exp.h.

References detailsBasic3DVector::y.

                                           {
   return float(0x2.p0) +
          y * (float(0x2.p0) + y * (float(0xf.ffed8p-4) +
                                    y * (float(0x5.5551cp-4) + y * (float(0x1.5740d8p-4) + y * float(0x4.49368p-8)))));
 }

template<>

constexpr float approx_expf_P< 6 > ( float y )

Definition at line 72 of file approx_exp.h.

References AlCaHLTBitMon_ParallelJobs::p, and detailsBasic3DVector::y.

                                           {
 #ifdef HORNER  // HORNER
   float p =
       float(0x2.p0) +
       y * (float(0x2.p0) +
            y * (float(0x1.p0) + y * (float(0x5.55523p-4) + y * (float(0x1.5554dcp-4) +
                                                                 y * (float(0x4.48f41p-8) + y * float(0xb.6ad4p-12))))));
 #else  // ESTRIN does seem to save a cycle or two
   float p56 = float(0x4.48f41p-8) + y * float(0xb.6ad4p-12);
   float p34 = float(0x5.55523p-4) + y * float(0x1.5554dcp-4);
   float y2 = y * y;
   float p12 = float(0x2.p0) + y;  // By chance we save one operation here! Funny.
   float p36 = p34 + y2 * p56;
   float p16 = p12 + y2 * p36;
   float p = float(0x2.p0) + y * p16;
 #endif
   return p;
 }

template<>

constexpr float approx_expf_P< 7 > ( float y )

Definition at line 93 of file approx_exp.h.

References detailsBasic3DVector::y.

                                           {
   return float(0x2.p0) +
          y * (float(0x2.p0) +
               y * (float(0x1.p0) +
                    y * (float(0x5.55555p-4) +
                         y * (float(0x1.5554e4p-4) +
                              y * (float(0x4.444adp-8) + y * (float(0xb.6a8a6p-12) + y * float(0x1.9ec814p-12)))))));
 }

template<int DEGREE>

constexpr float unsafe_expf ( float x )

Definition at line 171 of file approx_exp.h.

References gpuClustering::x.

                                      {
   return unsafe_expf_impl<DEGREE>(x);
 }

template<int DEGREE>

constexpr float unsafe_expf_impl ( float x )

Definition at line 125 of file approx_exp.h.

References alignCSCRings::e, validate-o2o-wbm::f, approx_math::fpfloor(), and AlCaHLTBitMon_ParallelJobs::p.

                                           {
   using namespace approx_math;
   /* Sollya for the following constants:
      display=hexadecimal;
      1b23+1b22;
      single(1/log(2));
      log2H=round(log(2), 16, RN);
      log2L = single(log(2)-log2H);
      log2H; log2L;
      
   */
   // constexpr float rnd_cst = float(0xc.p20);
   constexpr float inv_log2f = float(0x1.715476p0);
   constexpr float log2H = float(0xb.172p-4);
   constexpr float log2L = float(0x1.7f7d1cp-20);
 
   float y = x;
   // This is doing round(x*inv_log2f) to the nearest integer
   float z = fpfloor((x * inv_log2f) + 0.5f);
   // Cody-and-Waite accurate range reduction. FMA-safe.
   y -= z * log2H;
   y -= z * log2L;
   // exponent
   int32_t e = z;
 
   // we want RN above because it centers the interval around zero
   // but then we could have 2^e = below being infinity when it shouldn't
   // (when e=128 but p<1)
   // so we avoid this case by reducing e and evaluating a polynomial for 2*exp
   e -= 1;
 
   // NaN inputs will propagate to the output as expected
 
   float p = approx_expf_P<DEGREE>(y);
 
   // cout << "x=" << x << "  e=" << e << "  y=" << y << "  p=" << p <<"\n";
   binary32 ef;
   uint32_t biased_exponent = e + 127;
   ef.ui32 = (biased_exponent << 23);
 
   return p * ef.f;
 }

Functions

Function Documentation