CMS 3D CMS Logo

List of all members | Classes | Public Member Functions | Static Public Member Functions | Static Private Member Functions | Static Private Attributes
MiniFloatConverter Class Reference

#include <libminifloat.h>

Classes

class  ReduceMantissaToNbitsRounding
 

Public Member Functions

 MiniFloatConverter ()
 

Static Public Member Functions

static float denorm_min ()
 
static float float16to32 (uint16_t h)
 
static uint16_t float32to16 (float x)
 
static uint16_t float32to16crop (float x)
 Fast implementation, but it crops the number so it biases low. More...
 
static uint16_t float32to16round (float x)
 Slower implementation, but it rounds to avoid biases. More...
 
static bool isdenorm (uint16_t h)
 
static float max ()
 
static float max32RoundedToMax16 ()
 
static float min ()
 
static float min32RoundedToMin16 ()
 
template<int bits>
static float reduceMantissaToNbits (const float &f)
 
static float reduceMantissaToNbits (const float &f, int bits)
 
template<int bits>
static float reduceMantissaToNbitsRounding (const float &f)
 
static float reduceMantissaToNbitsRounding (float f, int bits)
 
template<typename InItr , typename OutItr >
static void reduceMantissaToNbitsRounding (int bits, InItr begin, InItr end, OutItr out)
 

Static Private Member Functions

static void filltables ()
 

Static Private Attributes

static uint16_t basetable [512]
 
static uint32_t exponenttable [64]
 
static uint32_t mantissatable [2048]
 
static uint16_t offsettable [64]
 
static uint8_t shifttable [512]
 

Detailed Description

Definition at line 9 of file libminifloat.h.

Constructor & Destructor Documentation

◆ MiniFloatConverter()

MiniFloatConverter::MiniFloatConverter ( )

Definition at line 12 of file libminifloat.cc.

12  {
13  static bool once = false;
14  if (!once) {
15  filltables();
16  once = true;
17  }
18 }

References filltables().

Member Function Documentation

◆ denorm_min()

static float MiniFloatConverter::denorm_min ( )
inlinestatic

Definition at line 158 of file libminifloat.h.

158  {
159  union {
160  float flt;
161  uint32_t i32;
162  } conv;
163  conv.i32 = 0x33800000; // mantissatable[offsettable[0]+1]+exponenttable[0]
164  return conv.flt;
165  }

References conv.

◆ filltables()

void MiniFloatConverter::filltables ( )
staticprivate

Definition at line 20 of file libminifloat.cc.

20  {
21  // ==== mantissatable ===
22  // -- zero --
23  mantissatable[0] = 0;
24  // -- denorm --
25  for (unsigned int i = 1; i <= 1023; ++i) {
26  unsigned int m = (i << 13), e = 0;
27  while (!(m & 0x00800000)) { // While not normalized
28  e -= 0x00800000; // Decrement exponent (1<<23)
29  m <<= 1; // Shift mantissa
30  }
31  m &= ~0x00800000; // Clear leading 1 bit
32  e += 0x38800000; // Adjust bias ((127-14)<<23)
33  mantissatable[i] = m | e;
34  }
35  // -- norm --
36  for (unsigned int i = 1024; i <= 2047; ++i) {
37  mantissatable[i] = 0x38000000 + ((i - 1024) << 13);
38  }
39  // ==== exponenttable ===
40  exponenttable[0] = 0;
41  for (unsigned int i = 1; i <= 30; ++i)
42  exponenttable[i] = i << 23;
43  exponenttable[31] = 0x47800000;
44  exponenttable[32] = 0x80000000u;
45  for (unsigned int i = 33; i <= 62; ++i)
46  exponenttable[i] = 0x80000000u | ((i - 32) << 23);
47  exponenttable[63] = 0xC7800000;
48 
49  // ==== offsettable ====
50  for (unsigned int i = 0; i <= 63; ++i)
51  offsettable[i] = ((i == 0 || i == 32) ? 0 : 1024);
52 
53  // ==== basetable, shifttable ===
54  for (unsigned i = 0; i < 256; ++i) {
55  int e = int(i) - 127;
56  if (e < -24) { // Very small numbers map to zero
57  basetable[i | 0x000] = 0x0000;
58  basetable[i | 0x100] = 0x8000;
59  shifttable[i | 0x000] = 24;
60  shifttable[i | 0x100] = 24;
61  } else if (e < -14) { // Small numbers map to denorms
62  basetable[i | 0x000] = (0x0400 >> (-e - 14));
63  basetable[i | 0x100] = (0x0400 >> (-e - 14)) | 0x8000;
64  shifttable[i | 0x000] = -e - 1;
65  shifttable[i | 0x100] = -e - 1;
66  } else if (e <= 15) { // Normal numbers just lose precision
67  basetable[i | 0x000] = ((e + 15) << 10);
68  basetable[i | 0x100] = ((e + 15) << 10) | 0x8000;
69  shifttable[i | 0x000] = 13;
70  shifttable[i | 0x100] = 13;
71  } else if (e < 128) { // Large numbers map to Infinity
72  basetable[i | 0x000] = 0x7C00;
73  basetable[i | 0x100] = 0xFC00;
74  shifttable[i | 0x000] = 24;
75  shifttable[i | 0x100] = 24;
76  } else { // Infinity and NaN's stay Infinity and NaN's
77  basetable[i | 0x000] = 0x7C00;
78  basetable[i | 0x100] = 0xFC00;
79  shifttable[i | 0x000] = 13;
80  shifttable[i | 0x100] = 13;
81  }
82  }
83 }

References basetable, MillePedeFileConverter_cfg::e, exponenttable, mps_fire::i, createfilelist::int, visualization-live-secondInstance_cfg::m, mantissatable, offsettable, and shifttable.

Referenced by MiniFloatConverter().

◆ float16to32()

static float MiniFloatConverter::float16to32 ( uint16_t  h)
inlinestatic

Definition at line 12 of file libminifloat.h.

12  {
13  union {
14  float flt;
15  uint32_t i32;
16  } conv;
17  conv.i32 = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] + exponenttable[h >> 10];
18  return conv.flt;
19  }

References conv, exponenttable, mantissatable, and offsettable.

Referenced by convertPackedEtaToPackedY(), CompressionElement::unpack(), pat::MET::PackedMETUncertainty::unpack(), pat::PackedGenParticle::unpack(), pat::PackedCandidate::unpack(), BeamCurrentInfo::unpackData(), and pat::PackedCandidate::unpackVtx().

◆ float32to16()

static uint16_t MiniFloatConverter::float32to16 ( float  x)
inlinestatic

◆ float32to16crop()

static uint16_t MiniFloatConverter::float32to16crop ( float  x)
inlinestatic

Fast implementation, but it crops the number so it biases low.

Definition at line 22 of file libminifloat.h.

22  {
23  union {
24  float flt;
25  uint32_t i32;
26  } conv;
27  conv.flt = x;
28  return basetable[(conv.i32 >> 23) & 0x1ff] + ((conv.i32 & 0x007fffff) >> shifttable[(conv.i32 >> 23) & 0x1ff]);
29  }

References basetable, conv, shifttable, and x.

◆ float32to16round()

static uint16_t MiniFloatConverter::float32to16round ( float  x)
inlinestatic

Slower implementation, but it rounds to avoid biases.

Definition at line 31 of file libminifloat.h.

31  {
32  union {
33  float flt;
34  uint32_t i32;
35  } conv;
36  conv.flt = x;
37  uint8_t shift = shifttable[(conv.i32 >> 23) & 0x1ff];
38  if (shift == 13) {
39  uint16_t base2 = (conv.i32 & 0x007fffff) >> 12;
40  uint16_t base = base2 >> 1;
41  if (((base2 & 1) != 0) && (base < 1023))
42  base++;
43  return basetable[(conv.i32 >> 23) & 0x1ff] + base;
44  } else {
45  return basetable[(conv.i32 >> 23) & 0x1ff] + ((conv.i32 & 0x007fffff) >> shifttable[(conv.i32 >> 23) & 0x1ff]);
46  }
47  }

References newFWLiteAna::base, compare_using_db::base2, basetable, conv, edm::shift, shifttable, and x.

Referenced by float32to16().

◆ isdenorm()

static bool MiniFloatConverter::isdenorm ( uint16_t  h)
inlinestatic

Definition at line 167 of file libminifloat.h.

167  {
168  // if exponent is zero (sign-bit excluded of course) and mantissa is not zero
169  return ((h >> 10) & 0x1f) == 0 && (h & 0x3ff) != 0;
170  }

◆ max()

static float MiniFloatConverter::max ( )
inlinestatic

Definition at line 116 of file libminifloat.h.

116  {
117  union {
118  float flt;
119  uint32_t i32;
120  } conv;
121  conv.i32 = 0x477fe000; // = mantissatable[offsettable[0x1e]+0x3ff]+exponenttable[0x1e]
122  return conv.flt;
123  }

References conv.

Referenced by pat::PackedCandidate::pack().

◆ max32RoundedToMax16()

static float MiniFloatConverter::max32RoundedToMax16 ( )
inlinestatic

Definition at line 126 of file libminifloat.h.

126  {
127  union {
128  float flt;
129  uint32_t i32;
130  } conv;
131  // 2^16 in float32 is the first to result inf in float16, so
132  // 2^16-1 is the last float32 to result max() in float16
133  conv.i32 = (0x8f << 23) - 1;
134  return conv.flt;
135  }

References conv.

◆ min()

static float MiniFloatConverter::min ( )
inlinestatic

Definition at line 137 of file libminifloat.h.

137  {
138  union {
139  float flt;
140  uint32_t i32;
141  } conv;
142  conv.i32 = 0x38800000; // = mantissatable[offsettable[1]+0]+exponenttable[1]
143  return conv.flt;
144  }

References conv.

◆ min32RoundedToMin16()

static float MiniFloatConverter::min32RoundedToMin16 ( )
inlinestatic

Definition at line 147 of file libminifloat.h.

147  {
148  union {
149  float flt;
150  uint32_t i32;
151  } conv;
152  // 2^-14-1 in float32 is the first to result denormalized in float16, so
153  // 2^-14 is the first float32 to result min() in float16
154  conv.i32 = (0x71 << 23);
155  return conv.flt;
156  }

References conv.

◆ reduceMantissaToNbits() [1/2]

template<int bits>
static float MiniFloatConverter::reduceMantissaToNbits ( const float &  f)
inlinestatic

Definition at line 49 of file libminifloat.h.

49  {
50  static_assert(bits <= 23, "max mantissa size is 23 bits");
51  constexpr uint32_t mask = (0xFFFFFFFF >> (23 - bits)) << (23 - bits);
52  union {
53  float flt;
54  uint32_t i32;
55  } conv;
56  conv.flt = f;
57  conv.i32 &= mask;
58  return conv.flt;
59  }

References conv, and f.

Referenced by CompressionElement::pack().

◆ reduceMantissaToNbits() [2/2]

static float MiniFloatConverter::reduceMantissaToNbits ( const float &  f,
int  bits 
)
inlinestatic

Definition at line 60 of file libminifloat.h.

60  {
61  uint32_t mask = (0xFFFFFFFF >> (23 - bits)) << (23 - bits);
62  union {
63  float flt;
64  uint32_t i32;
65  } conv;
66  conv.flt = f;
67  conv.i32 &= mask;
68  return conv.flt;
69  }

References conv, and f.

◆ reduceMantissaToNbitsRounding() [1/3]

template<int bits>
static float MiniFloatConverter::reduceMantissaToNbitsRounding ( const float &  f)
inlinestatic

◆ reduceMantissaToNbitsRounding() [2/3]

static float MiniFloatConverter::reduceMantissaToNbitsRounding ( float  f,
int  bits 
)
inlinestatic

Definition at line 107 of file libminifloat.h.

107  {
108  return ReduceMantissaToNbitsRounding(bits)(f);
109  }

References f.

◆ reduceMantissaToNbitsRounding() [3/3]

template<typename InItr , typename OutItr >
static void MiniFloatConverter::reduceMantissaToNbitsRounding ( int  bits,
InItr  begin,
InItr  end,
OutItr  out 
)
inlinestatic

Definition at line 112 of file libminifloat.h.

112  {
113  std::transform(begin, end, out, ReduceMantissaToNbitsRounding(bits));
114  }

References mps_fire::end, MillePedeFileConverter_cfg::out, and HcalDetIdTransform::transform().

Member Data Documentation

◆ basetable

uint16_t MiniFloatConverter::basetable
staticprivate

Definition at line 176 of file libminifloat.h.

Referenced by filltables(), float32to16crop(), and float32to16round().

◆ exponenttable

uint32_t MiniFloatConverter::exponenttable
staticprivate

Definition at line 174 of file libminifloat.h.

Referenced by filltables(), and float16to32().

◆ mantissatable

uint32_t MiniFloatConverter::mantissatable
staticprivate

Definition at line 173 of file libminifloat.h.

Referenced by filltables(), and float16to32().

◆ offsettable

uint16_t MiniFloatConverter::offsettable
staticprivate

Definition at line 175 of file libminifloat.h.

Referenced by filltables(), and float16to32().

◆ shifttable

uint8_t MiniFloatConverter::shifttable
staticprivate

Definition at line 177 of file libminifloat.h.

Referenced by filltables(), float32to16crop(), and float32to16round().

mps_fire.i
i
Definition: mps_fire.py:428
f
double f[11][100]
Definition: MuScleFitUtils.cc:78
conv
static HepMC::IO_HEPEVT conv
Definition: BeamHaloProducer.cc:48
MiniFloatConverter::float32to16round
static uint16_t float32to16round(float x)
Slower implementation, but it rounds to avoid biases.
Definition: libminifloat.h:31
MiniFloatConverter::shifttable
static uint8_t shifttable[512]
Definition: libminifloat.h:177
DDAxes::x
MiniFloatConverter::mantissatable
static uint32_t mantissatable[2048]
Definition: libminifloat.h:173
MiniFloatConverter::basetable
static uint16_t basetable[512]
Definition: libminifloat.h:176
visualization-live-secondInstance_cfg.m
m
Definition: visualization-live-secondInstance_cfg.py:72
mps_fire.end
end
Definition: mps_fire.py:242
HcalDetIdTransform::transform
unsigned transform(const HcalDetId &id, unsigned transformCode)
Definition: HcalDetIdTransform.cc:7
h
MiniFloatConverter::offsettable
static uint16_t offsettable[64]
Definition: libminifloat.h:175
createfilelist.int
int
Definition: createfilelist.py:10
MiniFloatConverter::exponenttable
static uint32_t exponenttable[64]
Definition: libminifloat.h:174
compare_using_db.base2
base2
Definition: compare_using_db.py:175
edm::shift
static unsigned const int shift
Definition: LuminosityBlockID.cc:7
MillePedeFileConverter_cfg.out
out
Definition: MillePedeFileConverter_cfg.py:31
newFWLiteAna.base
base
Definition: newFWLiteAna.py:92
MiniFloatConverter::filltables
static void filltables()
Definition: libminifloat.cc:20
MillePedeFileConverter_cfg.e
e
Definition: MillePedeFileConverter_cfg.py:37