CMS 3D CMS Logo

libminifloat.h
Go to the documentation of this file.
1 #ifndef libminifloat_h
2 #define libminifloat_h
5 #include <cstdint>
6 #include <cassert>
7 #include <algorithm>
8 
9 // ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
11 public:
13  inline static float float16to32(uint16_t h) {
14  uint32_t i32 = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] + exponenttable[h >> 10];
15  return edm::bit_cast<float>(i32);
16  }
17  inline static uint16_t float32to16(float x) { return float32to16round(x); }
19  inline static uint16_t float32to16crop(float x) {
20  uint32_t i32 = edm::bit_cast<uint32_t>(x);
21  return basetable[(i32 >> 23) & 0x1ff] + ((i32 & 0x007fffff) >> shifttable[(i32 >> 23) & 0x1ff]);
22  }
24  inline static uint16_t float32to16round(float x) {
25  uint32_t i32 = edm::bit_cast<uint32_t>(x);
26  uint8_t shift = shifttable[(i32 >> 23) & 0x1ff];
27  if (shift == 13) {
28  uint16_t base2 = (i32 & 0x007fffff) >> 12;
29  uint16_t base = base2 >> 1;
30  if (((base2 & 1) != 0) && (base < 1023))
31  base++;
32  return basetable[(i32 >> 23) & 0x1ff] + base;
33  } else {
34  return basetable[(i32 >> 23) & 0x1ff] + ((i32 & 0x007fffff) >> shifttable[(i32 >> 23) & 0x1ff]);
35  }
36  }
37  template <int bits>
38  inline static float reduceMantissaToNbits(const float &f) {
39  static_assert(bits <= 23, "max mantissa size is 23 bits");
40  constexpr uint32_t mask = (0xFFFFFFFF >> (23 - bits)) << (23 - bits);
41  uint32_t i32 = edm::bit_cast<uint32_t>(f);
42  i32 &= mask;
43  return edm::bit_cast<float>(i32);
44  }
45  inline static float reduceMantissaToNbits(const float &f, int bits) {
46  uint32_t mask = (0xFFFFFFFF >> (23 - bits)) << (23 - bits);
47  uint32_t i32 = edm::bit_cast<uint32_t>(f);
48  i32 &= mask;
49  return edm::bit_cast<float>(i32);
50  }
51 
53  public:
55  : shift(23 - bits), mask((0xFFFFFFFF >> (shift)) << (shift)), test(1 << (shift - 1)), maxn((1 << bits) - 2) {
56  assert(bits <= 23); // "max mantissa size is 23 bits"
57  }
58  float operator()(float f) const {
59  constexpr uint32_t low23 = (0x007FFFFF); // mask to keep lowest 23 bits = mantissa
60  constexpr uint32_t hi9 = (0xFF800000); // mask to keep highest 9 bits = the rest
61  uint32_t i32 = edm::bit_cast<uint32_t>(f);
62  if (i32 & test) { // need to round
63  uint32_t mantissa = (i32 & low23) >> shift;
64  if (mantissa < maxn)
65  mantissa++;
66  i32 = (i32 & hi9) | (mantissa << shift);
67  } else {
68  i32 &= mask;
69  }
70  return edm::bit_cast<float>(i32);
71  }
72 
73  private:
74  const int shift;
75  const uint32_t mask, test, maxn;
76  };
77 
78  template <int bits>
79  inline static float reduceMantissaToNbitsRounding(const float &f) {
80  static const ReduceMantissaToNbitsRounding reducer(bits);
81  return reducer(f);
82  }
83 
84  inline static float reduceMantissaToNbitsRounding(float f, int bits) {
86  }
87 
88  template <typename InItr, typename OutItr>
89  static void reduceMantissaToNbitsRounding(int bits, InItr begin, InItr end, OutItr out) {
91  }
92 
93  inline static float max() {
94  constexpr uint32_t i32 = 0x477fe000; // = mantissatable[offsettable[0x1e]+0x3ff]+exponenttable[0x1e]
95  return edm::bit_cast<float>(i32);
96  }
97 
98  // Maximum float32 value that gets rounded to max()
99  inline static float max32RoundedToMax16() {
100  // 2^16 in float32 is the first to result inf in float16, so
101  // 2^16-1 is the last float32 to result max() in float16
102  constexpr uint32_t i32 = (0x8f << 23) - 1;
103  return edm::bit_cast<float>(i32);
104  }
105 
106  inline static float min() {
107  constexpr uint32_t i32 = 0x38800000; // = mantissatable[offsettable[1]+0]+exponenttable[1]
108  return edm::bit_cast<float>(i32);
109  }
110 
111  // Minimum float32 value that gets rounded to min()
112  inline static float min32RoundedToMin16() {
113  // 2^-14-1 in float32 is the first to result denormalized in float16, so
114  // 2^-14 is the first float32 to result min() in float16
115  constexpr uint32_t i32 = (0x71 << 23);
116  return edm::bit_cast<float>(i32);
117  }
118 
119  inline static float denorm_min() {
120  constexpr uint32_t i32 = 0x33800000; // mantissatable[offsettable[0]+1]+exponenttable[0]
121  return edm::bit_cast<float>(i32);
122  }
123 
124  inline static bool isdenorm(uint16_t h) {
125  // if exponent is zero (sign-bit excluded of course) and mantissa is not zero
126  return ((h >> 10) & 0x1f) == 0 && (h & 0x3ff) != 0;
127  }
128 
129 private:
130  CMS_THREAD_SAFE static uint32_t mantissatable[2048];
131  CMS_THREAD_SAFE static uint32_t exponenttable[64];
132  CMS_THREAD_SAFE static uint16_t offsettable[64];
133  CMS_THREAD_SAFE static uint16_t basetable[512];
134  CMS_THREAD_SAFE static uint8_t shifttable[512];
135  static void filltables();
136 };
137 #endif
static uint16_t float32to16crop(float x)
Fast implementation, but it crops the number so it biases low.
Definition: libminifloat.h:19
static float min32RoundedToMin16()
Definition: libminifloat.h:112
static uint16_t offsettable[64]
Definition: libminifloat.h:132
To bit_cast(const From &src) noexcept
Definition: bit_cast.h:29
base
Main Program
Definition: newFWLiteAna.py:92
constexpr uint32_t bits
Definition: gpuClustering.h:25
assert(be >=bs)
static float float16to32(uint16_t h)
Definition: libminifloat.h:13
static uint16_t basetable[512]
Definition: libminifloat.h:133
constexpr uint32_t mask
Definition: gpuClustering.h:26
static float denorm_min()
Definition: libminifloat.h:119
static uint8_t shifttable[512]
Definition: libminifloat.h:134
static uint16_t float32to16(float x)
Definition: libminifloat.h:17
static uint32_t mantissatable[2048]
Definition: libminifloat.h:130
static float reduceMantissaToNbits(const float &f, int bits)
Definition: libminifloat.h:45
double f[11][100]
#define CMS_THREAD_SAFE
static float min()
Definition: libminifloat.h:106
static float reduceMantissaToNbitsRounding(float f, int bits)
Definition: libminifloat.h:84
static bool isdenorm(uint16_t h)
Definition: libminifloat.h:124
static float reduceMantissaToNbitsRounding(const float &f)
Definition: libminifloat.h:79
static float max32RoundedToMax16()
Definition: libminifloat.h:99
static unsigned int const shift
static float max()
Definition: libminifloat.h:93
static void filltables()
Definition: libminifloat.cc:20
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
static uint16_t float32to16round(float x)
Slower implementation, but it rounds to avoid biases.
Definition: libminifloat.h:24
static float reduceMantissaToNbits(const float &f)
Definition: libminifloat.h:38
static uint32_t exponenttable[64]
Definition: libminifloat.h:131
static void reduceMantissaToNbitsRounding(int bits, InItr begin, InItr end, OutItr out)
Definition: libminifloat.h:89
unsigned transform(const HcalDetId &id, unsigned transformCode)