CMS 3D CMS Logo

List of all members | Classes | Public Member Functions | Static Public Member Functions | Static Private Member Functions | Static Private Attributes
MiniFloatConverter Class Reference

#include <libminifloat.h>

Classes

class  ReduceMantissaToNbitsRounding
 

Public Member Functions

 MiniFloatConverter ()
 

Static Public Member Functions

static float denorm_min ()
 
static float float16to32 (uint16_t h)
 
static uint16_t float32to16 (float x)
 
static uint16_t float32to16crop (float x)
 Fast implementation, but it crops the number so it biases low. More...
 
static uint16_t float32to16round (float x)
 Slower implementation, but it rounds to avoid biases. More...
 
static bool isdenorm (uint16_t h)
 
static float max ()
 
static float max32RoundedToMax16 ()
 
static float min ()
 
static float min32RoundedToMin16 ()
 
template<int bits>
static float reduceMantissaToNbits (const float &f)
 
static float reduceMantissaToNbits (const float &f, int bits)
 
template<int bits>
static float reduceMantissaToNbitsRounding (const float &f)
 
static float reduceMantissaToNbitsRounding (float f, int bits)
 
template<typename InItr , typename OutItr >
static void reduceMantissaToNbitsRounding (int bits, InItr begin, InItr end, OutItr out)
 

Static Private Member Functions

static void filltables ()
 

Static Private Attributes

static uint16_t basetable [512]
 
static uint32_t exponenttable [64]
 
static uint32_t mantissatable [2048]
 
static uint16_t offsettable [64]
 
static uint8_t shifttable [512]
 

Detailed Description

Definition at line 9 of file libminifloat.h.

Constructor & Destructor Documentation

MiniFloatConverter::MiniFloatConverter ( )

Definition at line 13 of file libminifloat.cc.

References filltables().

13  {
14  static bool once = false;
15  if (!once) { filltables(); once = true; }
16 }
static void filltables()
Definition: libminifloat.cc:18

Member Function Documentation

static float MiniFloatConverter::denorm_min ( )
inlinestatic

Definition at line 136 of file libminifloat.h.

References conv.

136  {
137  union { float flt; uint32_t i32; } conv;
138  conv.i32 = 0x33800000; // mantissatable[offsettable[0]+1]+exponenttable[0]
139  return conv.flt;
140  }
static HepMC::IO_HEPEVT conv
void MiniFloatConverter::filltables ( )
staticprivate

Definition at line 18 of file libminifloat.cc.

References basetable, MillePedeFileConverter_cfg::e, exponenttable, mps_fire::i, createfilelist::int, funct::m, mantissatable, offsettable, and shifttable.

Referenced by MiniFloatConverter().

18  {
19  // ==== mantissatable ===
20  // -- zero --
21  mantissatable[0] = 0;
22  // -- denorm --
23  for (unsigned int i = 1; i <= 1023; ++i) {
24  unsigned int m =(i<<13), e=0;
25  while(!(m&0x00800000)){ // While not normalized
26  e-=0x00800000; // Decrement exponent (1<<23)
27  m<<=1; // Shift mantissa
28  }
29  m&=~0x00800000; // Clear leading 1 bit
30  e+= 0x38800000; // Adjust bias ((127-14)<<23)
31  mantissatable[i] = m | e;
32  }
33  // -- norm --
34  for (unsigned int i = 1024; i <= 2047; ++i) {
35  mantissatable[i] = 0x38000000 + ((i-1024)<<13);
36  }
37  // ==== exponenttable ===
38  exponenttable[0] = 0;
39  for (unsigned int i = 1; i <= 30; ++i) exponenttable[i] = i<<23;
40  exponenttable[31] = 0x47800000;
41  exponenttable[32] = 0x80000000u;
42  for (unsigned int i = 33; i <= 62; ++i) exponenttable[i] = 0x80000000u | ((i-32)<<23);
43  exponenttable[63] = 0xC7800000;
44 
45  // ==== offsettable ====
46  for (unsigned int i = 0; i <= 63; ++i) offsettable[i] = ((i == 0 || i == 32) ? 0 : 1024);
47 
48  // ==== basetable, shifttable ===
49  for (unsigned i=0; i<256; ++i){
50  int e = int(i)-127;
51  if(e<-24){ // Very small numbers map to zero
52  basetable[i|0x000]=0x0000;
53  basetable[i|0x100]=0x8000;
54  shifttable[i|0x000]=24;
55  shifttable[i|0x100]=24;
56  }
57  else if(e<-14){ // Small numbers map to denorms
58  basetable[i|0x000]=(0x0400>>(-e-14));
59  basetable[i|0x100]=(0x0400>>(-e-14)) | 0x8000;
60  shifttable[i|0x000]=-e-1;
61  shifttable[i|0x100]=-e-1;
62  }
63  else if(e<=15){ // Normal numbers just lose precision
64  basetable[i|0x000]=((e+15)<<10);
65  basetable[i|0x100]=((e+15)<<10) | 0x8000;
66  shifttable[i|0x000]=13;
67  shifttable[i|0x100]=13;
68  }
69  else if(e<128){ // Large numbers map to Infinity
70  basetable[i|0x000]=0x7C00;
71  basetable[i|0x100]=0xFC00;
72  shifttable[i|0x000]=24;
73  shifttable[i|0x100]=24;
74  }
75  else{ // Infinity and NaN's stay Infinity and NaN's
76  basetable[i|0x000]=0x7C00;
77  basetable[i|0x100]=0xFC00;
78  shifttable[i|0x000]=13;
79  shifttable[i|0x100]=13;
80  }
81  }
82 }
static uint16_t offsettable[64]
Definition: libminifloat.h:150
static uint16_t basetable[512]
Definition: libminifloat.h:151
static uint8_t shifttable[512]
Definition: libminifloat.h:152
static uint32_t mantissatable[2048]
Definition: libminifloat.h:148
static uint32_t exponenttable[64]
Definition: libminifloat.h:149
static float MiniFloatConverter::float16to32 ( uint16_t  h)
inlinestatic

Definition at line 12 of file libminifloat.h.

References conv, exponenttable, mantissatable, and offsettable.

Referenced by convertPackedEtaToPackedY(), CompressionElement::unpack(), pat::MET::PackedMETUncertainty::unpack(), pat::PackedGenParticle::unpack(), pat::PackedCandidate::unpack(), BeamCurrentInfo::unpackData(), and pat::PackedCandidate::unpackVtx().

12  {
13  union { float flt; uint32_t i32; } conv;
14  conv.i32 = mantissatable[offsettable[h>>10]+(h&0x3ff)]+exponenttable[h>>10];
15  return conv.flt;
16  }
FWCore Framework interface EventSetupRecordImplementation h
Helper function to determine trigger accepts.
static uint16_t offsettable[64]
Definition: libminifloat.h:150
static HepMC::IO_HEPEVT conv
static uint32_t mantissatable[2048]
Definition: libminifloat.h:148
static uint32_t exponenttable[64]
Definition: libminifloat.h:149
static uint16_t MiniFloatConverter::float32to16 ( float  x)
inlinestatic

Definition at line 17 of file libminifloat.h.

References float32to16round().

Referenced by CompressionElement::pack(), pat::MET::PackedMETUncertainty::pack(), pat::PackedGenParticle::pack(), pat::PackedCandidate::pack(), BeamCurrentInfo::packData(), and pat::PackedCandidate::packVtx().

17  {
18  return float32to16round(x);
19  }
static uint16_t float32to16round(float x)
Slower implementation, but it rounds to avoid biases.
Definition: libminifloat.h:27
static uint16_t MiniFloatConverter::float32to16crop ( float  x)
inlinestatic

Fast implementation, but it crops the number so it biases low.

Definition at line 21 of file libminifloat.h.

References basetable, conv, shifttable, and x.

21  {
22  union { float flt; uint32_t i32; } conv;
23  conv.flt = x;
24  return basetable[(conv.i32>>23)&0x1ff]+((conv.i32&0x007fffff)>>shifttable[(conv.i32>>23)&0x1ff]);
25  }
static HepMC::IO_HEPEVT conv
static uint16_t basetable[512]
Definition: libminifloat.h:151
static uint8_t shifttable[512]
Definition: libminifloat.h:152
static uint16_t MiniFloatConverter::float32to16round ( float  x)
inlinestatic

Slower implementation, but it rounds to avoid biases.

Definition at line 27 of file libminifloat.h.

References runEdmFileComparison::base, compare_using_db::base2, basetable, conv, edm::shift, shifttable, and x.

Referenced by float32to16().

27  {
28  union { float flt; uint32_t i32; } conv;
29  conv.flt = x;
30  uint8_t shift = shifttable[(conv.i32>>23)&0x1ff];
31  if (shift == 13) {
32  uint16_t base2 = (conv.i32&0x007fffff)>>12;
33  uint16_t base = base2 >> 1;
34  if (((base2 & 1) != 0) && (base < 1023)) base++;
35  return basetable[(conv.i32>>23)&0x1ff]+base;
36  } else {
37  return basetable[(conv.i32>>23)&0x1ff]+((conv.i32&0x007fffff)>>shifttable[(conv.i32>>23)&0x1ff]);
38  }
39  }
static HepMC::IO_HEPEVT conv
static uint16_t basetable[512]
Definition: libminifloat.h:151
static uint8_t shifttable[512]
Definition: libminifloat.h:152
base
Make Sure CMSSW is Setup ##.
static unsigned int const shift
static bool MiniFloatConverter::isdenorm ( uint16_t  h)
inlinestatic

Definition at line 142 of file libminifloat.h.

142  {
143  // if exponent is zero (sign-bit excluded of course) and mantissa is not zero
144  return ((h >> 10) & 0x1f) == 0 && (h & 0x3ff) != 0;
145  }
FWCore Framework interface EventSetupRecordImplementation h
Helper function to determine trigger accepts.
static float MiniFloatConverter::max ( )
inlinestatic

Definition at line 106 of file libminifloat.h.

References conv.

106  {
107  union { float flt; uint32_t i32; } conv;
108  conv.i32 = 0x477fe000; // = mantissatable[offsettable[0x1e]+0x3ff]+exponenttable[0x1e]
109  return conv.flt;
110  }
static HepMC::IO_HEPEVT conv
static float MiniFloatConverter::max32RoundedToMax16 ( )
inlinestatic

Definition at line 113 of file libminifloat.h.

References conv.

113  {
114  union { float flt; uint32_t i32; } conv;
115  // 2^16 in float32 is the first to result inf in float16, so
116  // 2^16-1 is the last float32 to result max() in float16
117  conv.i32 = (0x8f<<23) - 1;
118  return conv.flt;
119  }
static HepMC::IO_HEPEVT conv
static float MiniFloatConverter::min ( )
inlinestatic

Definition at line 121 of file libminifloat.h.

References conv.

121  {
122  union { float flt; uint32_t i32; } conv;
123  conv.i32 = 0x38800000; // = mantissatable[offsettable[1]+0]+exponenttable[1]
124  return conv.flt;
125  }
static HepMC::IO_HEPEVT conv
static float MiniFloatConverter::min32RoundedToMin16 ( )
inlinestatic

Definition at line 128 of file libminifloat.h.

References conv.

128  {
129  union { float flt; uint32_t i32; } conv;
130  // 2^-14-1 in float32 is the first to result denormalized in float16, so
131  // 2^-14 is the first float32 to result min() in float16
132  conv.i32 = (0x71<<23);
133  return conv.flt;
134  }
static HepMC::IO_HEPEVT conv
template<int bits>
static float MiniFloatConverter::reduceMantissaToNbits ( const float &  f)
inlinestatic

Definition at line 41 of file libminifloat.h.

References bits, constexpr, conv, f, and RecoTauDiscriminantConfiguration::mask.

Referenced by CompressionElement::pack().

42  {
43  static_assert(bits <= 23,"max mantissa size is 23 bits");
44  constexpr uint32_t mask = (0xFFFFFFFF >> (23-bits)) << (23-bits);
45  union { float flt; uint32_t i32; } conv;
46  conv.flt=f;
47  conv.i32&=mask;
48  return conv.flt;
49  }
static HepMC::IO_HEPEVT conv
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision bits
double f[11][100]
#define constexpr
static float MiniFloatConverter::reduceMantissaToNbits ( const float &  f,
int  bits 
)
inlinestatic

Definition at line 50 of file libminifloat.h.

References bits, conv, f, and RecoTauDiscriminantConfiguration::mask.

51  {
52  uint32_t mask = (0xFFFFFFFF >> (23-bits)) << (23-bits);
53  union { float flt; uint32_t i32; } conv;
54  conv.flt=f;
55  conv.i32&=mask;
56  return conv.flt;
57  }
static HepMC::IO_HEPEVT conv
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision bits
double f[11][100]
template<int bits>
static float MiniFloatConverter::reduceMantissaToNbitsRounding ( const float &  f)
inlinestatic

Definition at line 86 of file libminifloat.h.

References bits.

Referenced by nanoaod::flatTableHelper::MaybeMantissaReduce< float >::bulk(), and nanoaod::flatTableHelper::MaybeMantissaReduce< float >::one().

87  {
88  static const ReduceMantissaToNbitsRounding reducer(bits);
89  return reducer(f);
90  }
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision bits
double f[11][100]
static float MiniFloatConverter::reduceMantissaToNbitsRounding ( float  f,
int  bits 
)
inlinestatic

Definition at line 94 of file libminifloat.h.

References f, and MiniFloatConverter::ReduceMantissaToNbitsRounding::ReduceMantissaToNbitsRounding().

95  {
96  return ReduceMantissaToNbitsRounding(bits)(f);
97  }
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision bits
double f[11][100]
template<typename InItr , typename OutItr >
static void MiniFloatConverter::reduceMantissaToNbitsRounding ( int  bits,
InItr  begin,
InItr  end,
OutItr  out 
)
inlinestatic

Definition at line 100 of file libminifloat.h.

References MiniFloatConverter::ReduceMantissaToNbitsRounding::ReduceMantissaToNbitsRounding(), and create_public_lumi_plots::transform.

101  {
102  std::transform(begin, end, out, ReduceMantissaToNbitsRounding(bits));
103  }
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision bits
#define end
Definition: vmac.h:39
#define begin
Definition: vmac.h:32

Member Data Documentation

uint16_t MiniFloatConverter::basetable
staticprivate

Definition at line 151 of file libminifloat.h.

Referenced by filltables(), float32to16crop(), and float32to16round().

uint32_t MiniFloatConverter::exponenttable
staticprivate

Definition at line 149 of file libminifloat.h.

Referenced by filltables(), and float16to32().

uint32_t MiniFloatConverter::mantissatable
staticprivate

Definition at line 148 of file libminifloat.h.

Referenced by filltables(), and float16to32().

uint16_t MiniFloatConverter::offsettable
staticprivate

Definition at line 150 of file libminifloat.h.

Referenced by filltables(), and float16to32().

uint8_t MiniFloatConverter::shifttable
staticprivate

Definition at line 152 of file libminifloat.h.

Referenced by filltables(), float32to16crop(), and float32to16round().