CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
libminifloat.h
Go to the documentation of this file.
1 #ifndef libminifloat_h
2 #define libminifloat_h
4 #include <cstdint>
5 
6 // ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
8  public:
10  inline static float float16to32(uint16_t h) {
11  union { float flt; uint32_t i32; } conv;
12  conv.i32 = mantissatable[offsettable[h>>10]+(h&0x3ff)]+exponenttable[h>>10];
13  return conv.flt;
14  }
15  inline static uint16_t float32to16(float x) {
16  return float32to16round(x);
17  }
19  inline static uint16_t float32to16crop(float x) {
20  union { float flt; uint32_t i32; } conv;
21  conv.flt = x;
22  return basetable[(conv.i32>>23)&0x1ff]+((conv.i32&0x007fffff)>>shifttable[(conv.i32>>23)&0x1ff]);
23  }
25  inline static uint16_t float32to16round(float x) {
26  union { float flt; uint32_t i32; } conv;
27  conv.flt = x;
28  uint8_t shift = shifttable[(conv.i32>>23)&0x1ff];
29  if (shift == 13) {
30  uint16_t base2 = (conv.i32&0x007fffff)>>12;
31  uint16_t base = base2 >> 1;
32  if (((base2 & 1) != 0) && (base < 1023)) base++;
33  return basetable[(conv.i32>>23)&0x1ff]+base;
34  } else {
35  return basetable[(conv.i32>>23)&0x1ff]+((conv.i32&0x007fffff)>>shifttable[(conv.i32>>23)&0x1ff]);
36  }
37  }
38  template<int bits>
39  inline static float reduceMantissaToNbits(const float &f)
40  {
41  static_assert(bits <= 23,"max mantissa size is 23 bits");
42  constexpr uint32_t mask = (0xFFFFFFFF >> (23-bits)) << (23-bits);
43  union { float flt; uint32_t i32; } conv;
44  conv.flt=f;
45  conv.i32&=mask;
46  return conv.flt;
47  }
48 
49  inline static float max() {
50  union { float flt; uint32_t i32; } conv;
51  conv.i32 = 0x477fe000; // = mantissatable[offsettable[0x1e]+0x3ff]+exponenttable[0x1e]
52  return conv.flt;
53  }
54 
55  // Maximum float32 value that gets rounded to max()
56  inline static float max32RoundedToMax16() {
57  union { float flt; uint32_t i32; } conv;
58  // 2^16 in float32 is the first to result inf in float16, so
59  // 2^16-1 is the last float32 to result max() in float16
60  conv.i32 = (0x8f<<23) - 1;
61  return conv.flt;
62  }
63 
64  inline static float min() {
65  union { float flt; uint32_t i32; } conv;
66  conv.i32 = 0x38800000; // = mantissatable[offsettable[1]+0]+exponenttable[1]
67  return conv.flt;
68  }
69 
70  // Minimum float32 value that gets rounded to min()
71  inline static float min32RoundedToMin16() {
72  union { float flt; uint32_t i32; } conv;
73  // 2^-14-1 in float32 is the first to result denormalized in float16, so
74  // 2^-14 is the first float32 to result min() in float16
75  conv.i32 = (0x71<<23);
76  return conv.flt;
77  }
78 
79  inline static float denorm_min() {
80  union { float flt; uint32_t i32; } conv;
81  conv.i32 = 0x33800000; // mantissatable[offsettable[0]+1]+exponenttable[0]
82  return conv.flt;
83  }
84 
85  inline static bool isdenorm(uint16_t h) {
86  // if exponent is zero (sign-bit excluded of course) and mantissa is not zero
87  return ((h >> 10) & 0x1f) == 0 && (h & 0x3ff) != 0;
88  }
89 
90  private:
91  CMS_THREAD_SAFE static uint32_t mantissatable[2048];
92  CMS_THREAD_SAFE static uint32_t exponenttable[64];
93  CMS_THREAD_SAFE static uint16_t offsettable[64];
94  CMS_THREAD_SAFE static uint16_t basetable[512];
95  CMS_THREAD_SAFE static uint8_t shifttable[512];
96  static void filltables() ;
97 };
98 #endif
static uint16_t float32to16crop(float x)
Fast implementation, but it crops the number so it biases low.
Definition: libminifloat.h:19
tuple base
Main Program
Definition: newFWLiteAna.py:91
static float min32RoundedToMin16()
Definition: libminifloat.h:71
FWCore Framework interface EventSetupRecordImplementation h
Helper function to determine trigger accepts.
static uint16_t offsettable[64]
Definition: libminifloat.h:93
static HepMC::IO_HEPEVT conv
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision bits
static float float16to32(uint16_t h)
Definition: libminifloat.h:10
static uint16_t basetable[512]
Definition: libminifloat.h:94
#define constexpr
static float denorm_min()
Definition: libminifloat.h:79
static uint8_t shifttable[512]
Definition: libminifloat.h:95
static uint16_t float32to16(float x)
Definition: libminifloat.h:15
static uint32_t mantissatable[2048]
Definition: libminifloat.h:91
double f[11][100]
#define CMS_THREAD_SAFE
static float min()
Definition: libminifloat.h:64
static bool isdenorm(uint16_t h)
Definition: libminifloat.h:85
static float max32RoundedToMax16()
Definition: libminifloat.h:56
static unsigned int const shift
static float max()
Definition: libminifloat.h:49
static void filltables()
Definition: libminifloat.cc:18
static uint16_t float32to16round(float x)
Slower implementation, but it rounds to avoid biases.
Definition: libminifloat.h:25
static float reduceMantissaToNbits(const float &f)
Definition: libminifloat.h:39
static uint32_t exponenttable[64]
Definition: libminifloat.h:92