CMS 3D CMS Logo

List of all members | Public Member Functions | Static Public Member Functions | Static Private Member Functions | Static Private Attributes
MiniFloatConverter Class Reference

#include <libminifloat.h>

Public Member Functions

 MiniFloatConverter ()
 

Static Public Member Functions

static float denorm_min ()
 
static float float16to32 (uint16_t h)
 
static uint16_t float32to16 (float x)
 
static uint16_t float32to16crop (float x)
 Fast implementation, but it crops the number so it biases low. More...
 
static uint16_t float32to16round (float x)
 Slower implementation, but it rounds to avoid biases. More...
 
static bool isdenorm (uint16_t h)
 
static float max ()
 
static float max32RoundedToMax16 ()
 
static float min ()
 
static float min32RoundedToMin16 ()
 
template<int bits>
static float reduceMantissaToNbits (const float &f)
 
static float reduceMantissaToNbits (const float &f, int bits)
 
template<int bits>
static float reduceMantissaToNbitsRounding (const float &f)
 

Static Private Member Functions

static void filltables ()
 

Static Private Attributes

static uint16_t basetable [512]
 
static uint32_t exponenttable [64]
 
static uint32_t mantissatable [2048]
 
static uint16_t offsettable [64]
 
static uint8_t shifttable [512]
 

Detailed Description

Definition at line 7 of file libminifloat.h.

Constructor & Destructor Documentation

MiniFloatConverter::MiniFloatConverter ( )

Definition at line 13 of file libminifloat.cc.

References filltables().

13  {
14  static bool once = false;
15  if (!once) { filltables(); once = true; }
16 }
static void filltables()
Definition: libminifloat.cc:18

Member Function Documentation

static float MiniFloatConverter::denorm_min ( )
inlinestatic

Definition at line 109 of file libminifloat.h.

References conv.

109  {
110  union { float flt; uint32_t i32; } conv;
111  conv.i32 = 0x33800000; // mantissatable[offsettable[0]+1]+exponenttable[0]
112  return conv.flt;
113  }
static HepMC::IO_HEPEVT conv
void MiniFloatConverter::filltables ( )
staticprivate

Definition at line 18 of file libminifloat.cc.

References basetable, MillePedeFileConverter_cfg::e, exponenttable, mps_fire::i, createfilelist::int, funct::m, mantissatable, offsettable, and shifttable.

Referenced by MiniFloatConverter().

18  {
19  // ==== mantissatable ===
20  // -- zero --
21  mantissatable[0] = 0;
22  // -- denorm --
23  for (unsigned int i = 1; i <= 1023; ++i) {
24  unsigned int m =(i<<13), e=0;
25  while(!(m&0x00800000)){ // While not normalized
26  e-=0x00800000; // Decrement exponent (1<<23)
27  m<<=1; // Shift mantissa
28  }
29  m&=~0x00800000; // Clear leading 1 bit
30  e+= 0x38800000; // Adjust bias ((127-14)<<23)
31  mantissatable[i] = m | e;
32  }
33  // -- norm --
34  for (unsigned int i = 1024; i <= 2047; ++i) {
35  mantissatable[i] = 0x38000000 + ((i-1024)<<13);
36  }
37  // ==== exponenttable ===
38  exponenttable[0] = 0;
39  for (unsigned int i = 1; i <= 30; ++i) exponenttable[i] = i<<23;
40  exponenttable[31] = 0x47800000;
41  exponenttable[32] = 0x80000000u;
42  for (unsigned int i = 33; i <= 62; ++i) exponenttable[i] = 0x80000000u | ((i-32)<<23);
43  exponenttable[63] = 0xC7800000;
44 
45  // ==== offsettable ====
46  for (unsigned int i = 0; i <= 63; ++i) offsettable[i] = ((i == 0 || i == 32) ? 0 : 1024);
47 
48  // ==== basetable, shifttable ===
49  for (unsigned i=0; i<256; ++i){
50  int e = int(i)-127;
51  if(e<-24){ // Very small numbers map to zero
52  basetable[i|0x000]=0x0000;
53  basetable[i|0x100]=0x8000;
54  shifttable[i|0x000]=24;
55  shifttable[i|0x100]=24;
56  }
57  else if(e<-14){ // Small numbers map to denorms
58  basetable[i|0x000]=(0x0400>>(-e-14));
59  basetable[i|0x100]=(0x0400>>(-e-14)) | 0x8000;
60  shifttable[i|0x000]=-e-1;
61  shifttable[i|0x100]=-e-1;
62  }
63  else if(e<=15){ // Normal numbers just lose precision
64  basetable[i|0x000]=((e+15)<<10);
65  basetable[i|0x100]=((e+15)<<10) | 0x8000;
66  shifttable[i|0x000]=13;
67  shifttable[i|0x100]=13;
68  }
69  else if(e<128){ // Large numbers map to Infinity
70  basetable[i|0x000]=0x7C00;
71  basetable[i|0x100]=0xFC00;
72  shifttable[i|0x000]=24;
73  shifttable[i|0x100]=24;
74  }
75  else{ // Infinity and NaN's stay Infinity and NaN's
76  basetable[i|0x000]=0x7C00;
77  basetable[i|0x100]=0xFC00;
78  shifttable[i|0x000]=13;
79  shifttable[i|0x100]=13;
80  }
81  }
82 }
static uint16_t offsettable[64]
Definition: libminifloat.h:123
static uint16_t basetable[512]
Definition: libminifloat.h:124
static uint8_t shifttable[512]
Definition: libminifloat.h:125
static uint32_t mantissatable[2048]
Definition: libminifloat.h:121
static uint32_t exponenttable[64]
Definition: libminifloat.h:122
static float MiniFloatConverter::float16to32 ( uint16_t  h)
inlinestatic

Definition at line 10 of file libminifloat.h.

References conv, exponenttable, mantissatable, and offsettable.

Referenced by convertPackedEtaToPackedY(), CompressionElement::unpack(), pat::MET::PackedMETUncertainty::unpack(), pat::PackedGenParticle::unpack(), pat::PackedCandidate::unpack(), BeamCurrentInfo::unpackData(), and pat::PackedCandidate::unpackVtx().

10  {
11  union { float flt; uint32_t i32; } conv;
12  conv.i32 = mantissatable[offsettable[h>>10]+(h&0x3ff)]+exponenttable[h>>10];
13  return conv.flt;
14  }
static uint16_t offsettable[64]
Definition: libminifloat.h:123
static HepMC::IO_HEPEVT conv
static uint32_t mantissatable[2048]
Definition: libminifloat.h:121
static uint32_t exponenttable[64]
Definition: libminifloat.h:122
static uint16_t MiniFloatConverter::float32to16 ( float  x)
inlinestatic

Definition at line 15 of file libminifloat.h.

References float32to16round().

Referenced by CompressionElement::pack(), pat::MET::PackedMETUncertainty::pack(), pat::PackedGenParticle::pack(), pat::PackedCandidate::pack(), BeamCurrentInfo::packData(), and pat::PackedCandidate::packVtx().

15  {
16  return float32to16round(x);
17  }
static uint16_t float32to16round(float x)
Slower implementation, but it rounds to avoid biases.
Definition: libminifloat.h:25
static uint16_t MiniFloatConverter::float32to16crop ( float  x)
inlinestatic

Fast implementation, but it crops the number so it biases low.

Definition at line 19 of file libminifloat.h.

References basetable, conv, shifttable, and x.

19  {
20  union { float flt; uint32_t i32; } conv;
21  conv.flt = x;
22  return basetable[(conv.i32>>23)&0x1ff]+((conv.i32&0x007fffff)>>shifttable[(conv.i32>>23)&0x1ff]);
23  }
static HepMC::IO_HEPEVT conv
static uint16_t basetable[512]
Definition: libminifloat.h:124
static uint8_t shifttable[512]
Definition: libminifloat.h:125
static uint16_t MiniFloatConverter::float32to16round ( float  x)
inlinestatic

Slower implementation, but it rounds to avoid biases.

Definition at line 25 of file libminifloat.h.

References runEdmFileComparison::base, compare_using_db::base2, basetable, conv, edm::shift, shifttable, and x.

Referenced by float32to16().

25  {
26  union { float flt; uint32_t i32; } conv;
27  conv.flt = x;
28  uint8_t shift = shifttable[(conv.i32>>23)&0x1ff];
29  if (shift == 13) {
30  uint16_t base2 = (conv.i32&0x007fffff)>>12;
31  uint16_t base = base2 >> 1;
32  if (((base2 & 1) != 0) && (base < 1023)) base++;
33  return basetable[(conv.i32>>23)&0x1ff]+base;
34  } else {
35  return basetable[(conv.i32>>23)&0x1ff]+((conv.i32&0x007fffff)>>shifttable[(conv.i32>>23)&0x1ff]);
36  }
37  }
static HepMC::IO_HEPEVT conv
static uint16_t basetable[512]
Definition: libminifloat.h:124
static uint8_t shifttable[512]
Definition: libminifloat.h:125
base
Make Sure CMSSW is Setup ##.
static unsigned int const shift
static bool MiniFloatConverter::isdenorm ( uint16_t  h)
inlinestatic

Definition at line 115 of file libminifloat.h.

115  {
116  // if exponent is zero (sign-bit excluded of course) and mantissa is not zero
117  return ((h >> 10) & 0x1f) == 0 && (h & 0x3ff) != 0;
118  }
static float MiniFloatConverter::max ( )
inlinestatic

Definition at line 79 of file libminifloat.h.

References conv.

79  {
80  union { float flt; uint32_t i32; } conv;
81  conv.i32 = 0x477fe000; // = mantissatable[offsettable[0x1e]+0x3ff]+exponenttable[0x1e]
82  return conv.flt;
83  }
static HepMC::IO_HEPEVT conv
static float MiniFloatConverter::max32RoundedToMax16 ( )
inlinestatic

Definition at line 86 of file libminifloat.h.

References conv.

86  {
87  union { float flt; uint32_t i32; } conv;
88  // 2^16 in float32 is the first to result inf in float16, so
89  // 2^16-1 is the last float32 to result max() in float16
90  conv.i32 = (0x8f<<23) - 1;
91  return conv.flt;
92  }
static HepMC::IO_HEPEVT conv
static float MiniFloatConverter::min ( )
inlinestatic

Definition at line 94 of file libminifloat.h.

References conv.

94  {
95  union { float flt; uint32_t i32; } conv;
96  conv.i32 = 0x38800000; // = mantissatable[offsettable[1]+0]+exponenttable[1]
97  return conv.flt;
98  }
static HepMC::IO_HEPEVT conv
static float MiniFloatConverter::min32RoundedToMin16 ( )
inlinestatic

Definition at line 101 of file libminifloat.h.

References conv.

101  {
102  union { float flt; uint32_t i32; } conv;
103  // 2^-14-1 in float32 is the first to result denormalized in float16, so
104  // 2^-14 is the first float32 to result min() in float16
105  conv.i32 = (0x71<<23);
106  return conv.flt;
107  }
static HepMC::IO_HEPEVT conv
template<int bits>
static float MiniFloatConverter::reduceMantissaToNbits ( const float &  f)
inlinestatic

Definition at line 39 of file libminifloat.h.

References bits, constexpr, conv, f, and RecoTauDiscriminantConfiguration::mask.

Referenced by CompressionElement::pack().

40  {
41  static_assert(bits <= 23,"max mantissa size is 23 bits");
42  constexpr uint32_t mask = (0xFFFFFFFF >> (23-bits)) << (23-bits);
43  union { float flt; uint32_t i32; } conv;
44  conv.flt=f;
45  conv.i32&=mask;
46  return conv.flt;
47  }
static HepMC::IO_HEPEVT conv
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision bits
#define constexpr
double f[11][100]
static float MiniFloatConverter::reduceMantissaToNbits ( const float &  f,
int  bits 
)
inlinestatic

Definition at line 48 of file libminifloat.h.

References bits, conv, f, and RecoTauDiscriminantConfiguration::mask.

49  {
50  uint32_t mask = (0xFFFFFFFF >> (23-bits)) << (23-bits);
51  union { float flt; uint32_t i32; } conv;
52  conv.flt=f;
53  conv.i32&=mask;
54  return conv.flt;
55  }
static HepMC::IO_HEPEVT conv
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision bits
double f[11][100]
template<int bits>
static float MiniFloatConverter::reduceMantissaToNbitsRounding ( const float &  f)
inlinestatic

Definition at line 58 of file libminifloat.h.

References bits, constexpr, conv, f, RecoTauDiscriminantConfiguration::mask, and edm::shift.

59  {
60  static_assert(bits <= 23,"max mantissa size is 23 bits");
61  constexpr int shift = (23-bits); // bits I throw away
62  constexpr uint32_t mask = (0xFFFFFFFF >> (shift)) << (shift); // mask for truncation
63  constexpr uint32_t test = 1 << (shift-1); // most significant bit I throw away
64  constexpr uint32_t low23 = (0x007FFFFF); // mask to keep lowest 23 bits = mantissa
65  constexpr uint32_t hi9 = (0xFF800000); // mask to keep highest 9 bits = the rest
66  constexpr uint32_t maxn = (1<<bits)-2; // max number I can increase before overflowing
67  union { float flt; uint32_t i32; } conv;
68  conv.flt=f;
69  if (conv.i32 & test) { // need to round
70  uint32_t mantissa = (conv.i32 & low23) >> shift;
71  if (mantissa < maxn) mantissa++;
72  conv.i32 = (conv.i32 & hi9) | (mantissa << shift);
73  } else {
74  conv.i32 &= mask;
75  }
76  return conv.flt;
77  }
static HepMC::IO_HEPEVT conv
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision bits
#define constexpr
double f[11][100]
static unsigned int const shift

Member Data Documentation

uint16_t MiniFloatConverter::basetable
staticprivate

Definition at line 124 of file libminifloat.h.

Referenced by filltables(), float32to16crop(), and float32to16round().

uint32_t MiniFloatConverter::exponenttable
staticprivate

Definition at line 122 of file libminifloat.h.

Referenced by filltables(), and float16to32().

uint32_t MiniFloatConverter::mantissatable
staticprivate

Definition at line 121 of file libminifloat.h.

Referenced by filltables(), and float16to32().

uint16_t MiniFloatConverter::offsettable
staticprivate

Definition at line 123 of file libminifloat.h.

Referenced by filltables(), and float16to32().

uint8_t MiniFloatConverter::shifttable
staticprivate

Definition at line 125 of file libminifloat.h.

Referenced by filltables(), float32to16crop(), and float32to16round().