CMS 3D CMS Logo

libminifloat.h
Go to the documentation of this file.
1 #ifndef libminifloat_h
2 #define libminifloat_h
5 #include <cstdint>
6 #include <cassert>
7 #include <algorithm>
8 
9 // ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
11 public:
13  inline static float float16to32(uint16_t h) {
14  uint32_t i32 = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] + exponenttable[h >> 10];
15  return edm::bit_cast<float>(i32);
16  }
17  inline static uint16_t float32to16(float x) { return float32to16round(x); }
19  inline static uint16_t float32to16crop(float x) {
20  uint32_t i32 = edm::bit_cast<uint32_t>(x);
21  return basetable[(i32 >> 23) & 0x1ff] + ((i32 & 0x007fffff) >> shifttable[(i32 >> 23) & 0x1ff]);
22  }
24  inline static uint16_t float32to16round(float x) {
25  uint32_t i32 = edm::bit_cast<uint32_t>(x);
26  uint8_t shift = shifttable[(i32 >> 23) & 0x1ff];
27  if (shift == 13) {
28  uint16_t base2 = (i32 & 0x007fffff) >> 12;
29  uint16_t base = base2 >> 1;
30  if (((base2 & 1) != 0) && (base < 1023))
31  base++;
32  return basetable[(i32 >> 23) & 0x1ff] + base;
33  } else {
34  return basetable[(i32 >> 23) & 0x1ff] + ((i32 & 0x007fffff) >> shifttable[(i32 >> 23) & 0x1ff]);
35  }
36  }
37  template <int bits>
38  inline static float reduceMantissaToNbits(const float &f) {
39  static_assert(bits <= 23, "max mantissa size is 23 bits");
40  constexpr uint32_t mask = (0xFFFFFFFF >> (23 - bits)) << (23 - bits);
41  uint32_t i32 = edm::bit_cast<uint32_t>(f);
42  i32 &= mask;
43  return edm::bit_cast<float>(i32);
44  }
45  inline static float reduceMantissaToNbits(const float &f, int bits) {
46  uint32_t mask = (0xFFFFFFFF >> (23 - bits)) << (23 - bits);
47  uint32_t i32 = edm::bit_cast<uint32_t>(f);
48  i32 &= mask;
49  return edm::bit_cast<float>(i32);
50  }
51 
53  public:
54 #ifdef CMS_UNDEFINED_SANITIZER
55  //Supress UBSan runtime error about -ve shift. This happens when bits==23
56  __attribute__((no_sanitize("shift")))
57 #endif
59  : shift(23 - bits), mask((0xFFFFFFFF >> (shift)) << (shift)), test(1 << (shift - 1)), maxn((1 << bits) - 2) {
60  assert(bits <= 23); // "max mantissa size is 23 bits"
61  }
62  float operator()(float f) const {
63  constexpr uint32_t low23 = (0x007FFFFF); // mask to keep lowest 23 bits = mantissa
64  constexpr uint32_t hi9 = (0xFF800000); // mask to keep highest 9 bits = the rest
65  uint32_t i32 = edm::bit_cast<uint32_t>(f);
66  if (i32 & test) { // need to round
67  uint32_t mantissa = (i32 & low23) >> shift;
68  if (mantissa < maxn)
69  mantissa++;
70  i32 = (i32 & hi9) | (mantissa << shift);
71  } else {
72  i32 &= mask;
73  }
74  return edm::bit_cast<float>(i32);
75  }
76 
77  private:
78  const int shift;
79  const uint32_t mask, test, maxn;
80  };
81 
82  template <int bits>
83  inline static float reduceMantissaToNbitsRounding(const float &f) {
84  static const ReduceMantissaToNbitsRounding reducer(bits);
85  return reducer(f);
86  }
87 
88  inline static float reduceMantissaToNbitsRounding(float f, int bits) {
90  }
91 
92  template <typename InItr, typename OutItr>
93  static void reduceMantissaToNbitsRounding(int bits, InItr begin, InItr end, OutItr out) {
95  }
96 
97  inline static float max() {
98  constexpr uint32_t i32 = 0x477fe000; // = mantissatable[offsettable[0x1e]+0x3ff]+exponenttable[0x1e]
99  return edm::bit_cast<float>(i32);
100  }
101 
102  // Maximum float32 value that gets rounded to max()
103  inline static float max32RoundedToMax16() {
104  // 2^16 in float32 is the first to result inf in float16, so
105  // 2^16-1 is the last float32 to result max() in float16
106  constexpr uint32_t i32 = (0x8f << 23) - 1;
107  return edm::bit_cast<float>(i32);
108  }
109 
110  inline static float min() {
111  constexpr uint32_t i32 = 0x38800000; // = mantissatable[offsettable[1]+0]+exponenttable[1]
112  return edm::bit_cast<float>(i32);
113  }
114 
115  // Minimum float32 value that gets rounded to min()
116  inline static float min32RoundedToMin16() {
117  // 2^-14-1 in float32 is the first to result denormalized in float16, so
118  // 2^-14 is the first float32 to result min() in float16
119  constexpr uint32_t i32 = (0x71 << 23);
120  return edm::bit_cast<float>(i32);
121  }
122 
123  inline static float denorm_min() {
124  constexpr uint32_t i32 = 0x33800000; // mantissatable[offsettable[0]+1]+exponenttable[0]
125  return edm::bit_cast<float>(i32);
126  }
127 
128  inline static bool isdenorm(uint16_t h) {
129  // if exponent is zero (sign-bit excluded of course) and mantissa is not zero
130  return ((h >> 10) & 0x1f) == 0 && (h & 0x3ff) != 0;
131  }
132 
133 private:
134  CMS_THREAD_SAFE static uint32_t mantissatable[2048];
135  CMS_THREAD_SAFE static uint32_t exponenttable[64];
136  CMS_THREAD_SAFE static uint16_t offsettable[64];
137  CMS_THREAD_SAFE static uint16_t basetable[512];
138  CMS_THREAD_SAFE static uint8_t shifttable[512];
139  static void filltables();
140 };
141 #endif
static uint16_t float32to16crop(float x)
Fast implementation, but it crops the number so it biases low.
Definition: libminifloat.h:19
static float min32RoundedToMin16()
Definition: libminifloat.h:116
static uint16_t offsettable[64]
Definition: libminifloat.h:136
assert(be >=bs)
static float float16to32(uint16_t h)
Definition: libminifloat.h:13
static uint16_t basetable[512]
Definition: libminifloat.h:137
static float denorm_min()
Definition: libminifloat.h:123
static uint8_t shifttable[512]
Definition: libminifloat.h:138
static uint16_t float32to16(float x)
Definition: libminifloat.h:17
float __attribute__((vector_size(8))) cms_float32x2_t
Definition: ExtVec.h:8
static uint32_t mantissatable[2048]
Definition: libminifloat.h:134
static float reduceMantissaToNbits(const float &f, int bits)
Definition: libminifloat.h:45
double f[11][100]
#define CMS_THREAD_SAFE
static float min()
Definition: libminifloat.h:110
static float reduceMantissaToNbitsRounding(float f, int bits)
Definition: libminifloat.h:88
static bool isdenorm(uint16_t h)
Definition: libminifloat.h:128
static float reduceMantissaToNbitsRounding(const float &f)
Definition: libminifloat.h:83
static float max32RoundedToMax16()
Definition: libminifloat.h:103
static unsigned int const shift
static float max()
Definition: libminifloat.h:97
static void filltables()
Definition: libminifloat.cc:20
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
static uint16_t float32to16round(float x)
Slower implementation, but it rounds to avoid biases.
Definition: libminifloat.h:24
static float reduceMantissaToNbits(const float &f)
Definition: libminifloat.h:38
static uint32_t exponenttable[64]
Definition: libminifloat.h:135
static void reduceMantissaToNbitsRounding(int bits, InItr begin, InItr end, OutItr out)
Definition: libminifloat.h:93
unsigned transform(const HcalDetId &id, unsigned transformCode)