CMS 3D CMS Logo

List of all members | Classes | Public Member Functions | Static Public Member Functions | Static Private Member Functions | Static Private Attributes
MiniFloatConverter Class Reference

#include <libminifloat.h>

Classes

class  ReduceMantissaToNbitsRounding
 

Public Member Functions

 MiniFloatConverter ()
 

Static Public Member Functions

static float denorm_min ()
 
static float float16to32 (uint16_t h)
 
static uint16_t float32to16 (float x)
 
static uint16_t float32to16crop (float x)
 Fast implementation, but it crops the number so it biases low. More...
 
static uint16_t float32to16round (float x)
 Slower implementation, but it rounds to avoid biases. More...
 
static bool isdenorm (uint16_t h)
 
static float max ()
 
static float max32RoundedToMax16 ()
 
static float min ()
 
static float min32RoundedToMin16 ()
 
template<int bits>
static float reduceMantissaToNbits (const float &f)
 
static float reduceMantissaToNbits (const float &f, int bits)
 
template<int bits>
static float reduceMantissaToNbitsRounding (const float &f)
 
static float reduceMantissaToNbitsRounding (float f, int bits)
 
template<typename InItr , typename OutItr >
static void reduceMantissaToNbitsRounding (int bits, InItr begin, InItr end, OutItr out)
 

Static Private Member Functions

static void filltables ()
 

Static Private Attributes

static uint16_t basetable [512]
 
static uint32_t exponenttable [64]
 
static uint32_t mantissatable [2048]
 
static uint16_t offsettable [64]
 
static uint8_t shifttable [512]
 

Detailed Description

Definition at line 10 of file libminifloat.h.

Constructor & Destructor Documentation

◆ MiniFloatConverter()

MiniFloatConverter::MiniFloatConverter ( )

Definition at line 12 of file libminifloat.cc.

References filltables().

12  {
13  static bool once = false;
14  if (!once) {
15  filltables();
16  once = true;
17  }
18 }
static void filltables()
Definition: libminifloat.cc:20

Member Function Documentation

◆ denorm_min()

static float MiniFloatConverter::denorm_min ( )
inlinestatic

Definition at line 119 of file libminifloat.h.

References ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr().

119  {
120  constexpr uint32_t i32 = 0x33800000; // mantissatable[offsettable[0]+1]+exponenttable[0]
121  return edm::bit_cast<float>(i32);
122  }

◆ filltables()

void MiniFloatConverter::filltables ( )
staticprivate

Definition at line 20 of file libminifloat.cc.

References basetable, MillePedeFileConverter_cfg::e, exponenttable, mps_fire::i, createfilelist::int, visualization-live-secondInstance_cfg::m, mantissatable, offsettable, and shifttable.

Referenced by MiniFloatConverter().

20  {
21  // ==== mantissatable ===
22  // -- zero --
23  mantissatable[0] = 0;
24  // -- denorm --
25  for (unsigned int i = 1; i <= 1023; ++i) {
26  unsigned int m = (i << 13), e = 0;
27  while (!(m & 0x00800000)) { // While not normalized
28  e -= 0x00800000; // Decrement exponent (1<<23)
29  m <<= 1; // Shift mantissa
30  }
31  m &= ~0x00800000; // Clear leading 1 bit
32  e += 0x38800000; // Adjust bias ((127-14)<<23)
33  mantissatable[i] = m | e;
34  }
35  // -- norm --
36  for (unsigned int i = 1024; i <= 2047; ++i) {
37  mantissatable[i] = 0x38000000 + ((i - 1024) << 13);
38  }
39  // ==== exponenttable ===
40  exponenttable[0] = 0;
41  for (unsigned int i = 1; i <= 30; ++i)
42  exponenttable[i] = i << 23;
43  exponenttable[31] = 0x47800000;
44  exponenttable[32] = 0x80000000u;
45  for (unsigned int i = 33; i <= 62; ++i)
46  exponenttable[i] = 0x80000000u | ((i - 32) << 23);
47  exponenttable[63] = 0xC7800000;
48 
49  // ==== offsettable ====
50  for (unsigned int i = 0; i <= 63; ++i)
51  offsettable[i] = ((i == 0 || i == 32) ? 0 : 1024);
52 
53  // ==== basetable, shifttable ===
54  for (unsigned i = 0; i < 256; ++i) {
55  int e = int(i) - 127;
56  if (e < -24) { // Very small numbers map to zero
57  basetable[i | 0x000] = 0x0000;
58  basetable[i | 0x100] = 0x8000;
59  shifttable[i | 0x000] = 24;
60  shifttable[i | 0x100] = 24;
61  } else if (e < -14) { // Small numbers map to denorms
62  basetable[i | 0x000] = (0x0400 >> (-e - 14));
63  basetable[i | 0x100] = (0x0400 >> (-e - 14)) | 0x8000;
64  shifttable[i | 0x000] = -e - 1;
65  shifttable[i | 0x100] = -e - 1;
66  } else if (e <= 15) { // Normal numbers just lose precision
67  basetable[i | 0x000] = ((e + 15) << 10);
68  basetable[i | 0x100] = ((e + 15) << 10) | 0x8000;
69  shifttable[i | 0x000] = 13;
70  shifttable[i | 0x100] = 13;
71  } else if (e < 128) { // Large numbers map to Infinity
72  basetable[i | 0x000] = 0x7C00;
73  basetable[i | 0x100] = 0xFC00;
74  shifttable[i | 0x000] = 24;
75  shifttable[i | 0x100] = 24;
76  } else { // Infinity and NaN's stay Infinity and NaN's
77  basetable[i | 0x000] = 0x7C00;
78  basetable[i | 0x100] = 0xFC00;
79  shifttable[i | 0x000] = 13;
80  shifttable[i | 0x100] = 13;
81  }
82  }
83 }
static uint16_t offsettable[64]
Definition: libminifloat.h:132
static uint16_t basetable[512]
Definition: libminifloat.h:133
static uint8_t shifttable[512]
Definition: libminifloat.h:134
static uint32_t mantissatable[2048]
Definition: libminifloat.h:130
static uint32_t exponenttable[64]
Definition: libminifloat.h:131

◆ float16to32()

static float MiniFloatConverter::float16to32 ( uint16_t  h)
inlinestatic

Definition at line 13 of file libminifloat.h.

References exponenttable, h, mantissatable, and offsettable.

Referenced by convertPackedEtaToPackedY(), CompressionElement::unpack(), pat::MET::PackedMETUncertainty::unpack(), pat::PackedGenParticle::unpack(), pat::PackedCandidate::unpack(), BeamCurrentInfo::unpackData(), pat::MET::PackedMETUncertainty::unpackDpx(), pat::MET::PackedMETUncertainty::unpackDpy(), pat::MET::PackedMETUncertainty::unpackDSumEt(), and pat::PackedCandidate::unpackVtx().

13  {
14  uint32_t i32 = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] + exponenttable[h >> 10];
15  return edm::bit_cast<float>(i32);
16  }
static uint16_t offsettable[64]
Definition: libminifloat.h:132
static uint32_t mantissatable[2048]
Definition: libminifloat.h:130
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
static uint32_t exponenttable[64]
Definition: libminifloat.h:131

◆ float32to16()

static uint16_t MiniFloatConverter::float32to16 ( float  x)
inlinestatic

Definition at line 17 of file libminifloat.h.

References float32to16round(), and x.

Referenced by CompressionElement::pack(), pat::MET::PackedMETUncertainty::pack(), pat::PackedGenParticle::pack(), pat::PackedCandidate::pack(), BeamCurrentInfo::packData(), and pat::PackedCandidate::packVtx().

17 { return float32to16round(x); }
static uint16_t float32to16round(float x)
Slower implementation, but it rounds to avoid biases.
Definition: libminifloat.h:24

◆ float32to16crop()

static uint16_t MiniFloatConverter::float32to16crop ( float  x)
inlinestatic

Fast implementation, but it crops the number so it biases low.

Definition at line 19 of file libminifloat.h.

References basetable, shifttable, and x.

19  {
20  uint32_t i32 = edm::bit_cast<uint32_t>(x);
21  return basetable[(i32 >> 23) & 0x1ff] + ((i32 & 0x007fffff) >> shifttable[(i32 >> 23) & 0x1ff]);
22  }
static uint16_t basetable[512]
Definition: libminifloat.h:133
static uint8_t shifttable[512]
Definition: libminifloat.h:134

◆ float32to16round()

static uint16_t MiniFloatConverter::float32to16round ( float  x)
inlinestatic

Slower implementation, but it rounds to avoid biases.

Definition at line 24 of file libminifloat.h.

References edmMakeDummyCfis::base, compare_using_db::base2, basetable, edm::shift, shifttable, and x.

Referenced by float32to16().

24  {
25  uint32_t i32 = edm::bit_cast<uint32_t>(x);
26  uint8_t shift = shifttable[(i32 >> 23) & 0x1ff];
27  if (shift == 13) {
28  uint16_t base2 = (i32 & 0x007fffff) >> 12;
29  uint16_t base = base2 >> 1;
30  if (((base2 & 1) != 0) && (base < 1023))
31  base++;
32  return basetable[(i32 >> 23) & 0x1ff] + base;
33  } else {
34  return basetable[(i32 >> 23) & 0x1ff] + ((i32 & 0x007fffff) >> shifttable[(i32 >> 23) & 0x1ff]);
35  }
36  }
static uint16_t basetable[512]
Definition: libminifloat.h:133
static uint8_t shifttable[512]
Definition: libminifloat.h:134
static unsigned int const shift

◆ isdenorm()

static bool MiniFloatConverter::isdenorm ( uint16_t  h)
inlinestatic

Definition at line 124 of file libminifloat.h.

References h.

124  {
125  // if exponent is zero (sign-bit excluded of course) and mantissa is not zero
126  return ((h >> 10) & 0x1f) == 0 && (h & 0x3ff) != 0;
127  }
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4

◆ max()

static float MiniFloatConverter::max ( )
inlinestatic

Definition at line 93 of file libminifloat.h.

References ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr().

Referenced by pat::PackedCandidate::pack().

93  {
94  constexpr uint32_t i32 = 0x477fe000; // = mantissatable[offsettable[0x1e]+0x3ff]+exponenttable[0x1e]
95  return edm::bit_cast<float>(i32);
96  }

◆ max32RoundedToMax16()

static float MiniFloatConverter::max32RoundedToMax16 ( )
inlinestatic

Definition at line 99 of file libminifloat.h.

References ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr().

99  {
100  // 2^16 in float32 is the first to result inf in float16, so
101  // 2^16-1 is the last float32 to result max() in float16
102  constexpr uint32_t i32 = (0x8f << 23) - 1;
103  return edm::bit_cast<float>(i32);
104  }

◆ min()

static float MiniFloatConverter::min ( )
inlinestatic

Definition at line 106 of file libminifloat.h.

References ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr().

106  {
107  constexpr uint32_t i32 = 0x38800000; // = mantissatable[offsettable[1]+0]+exponenttable[1]
108  return edm::bit_cast<float>(i32);
109  }

◆ min32RoundedToMin16()

static float MiniFloatConverter::min32RoundedToMin16 ( )
inlinestatic

Definition at line 112 of file libminifloat.h.

References ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr().

112  {
113  // 2^-14-1 in float32 is the first to result denormalized in float16, so
114  // 2^-14 is the first float32 to result min() in float16
115  constexpr uint32_t i32 = (0x71 << 23);
116  return edm::bit_cast<float>(i32);
117  }

◆ reduceMantissaToNbits() [1/2]

template<int bits>
static float MiniFloatConverter::reduceMantissaToNbits ( const float &  f)
inlinestatic

Definition at line 38 of file libminifloat.h.

References ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering::pixelStatus::bits, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), f, and ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering::pixelStatus::mask.

Referenced by CompressionElement::pack().

38  {
39  static_assert(bits <= 23, "max mantissa size is 23 bits");
40  constexpr uint32_t mask = (0xFFFFFFFF >> (23 - bits)) << (23 - bits);
41  uint32_t i32 = edm::bit_cast<uint32_t>(f);
42  i32 &= mask;
43  return edm::bit_cast<float>(i32);
44  }
double f[11][100]

◆ reduceMantissaToNbits() [2/2]

static float MiniFloatConverter::reduceMantissaToNbits ( const float &  f,
int  bits 
)
inlinestatic

Definition at line 45 of file libminifloat.h.

References ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering::pixelStatus::bits, f, and ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering::pixelStatus::mask.

45  {
46  uint32_t mask = (0xFFFFFFFF >> (23 - bits)) << (23 - bits);
47  uint32_t i32 = edm::bit_cast<uint32_t>(f);
48  i32 &= mask;
49  return edm::bit_cast<float>(i32);
50  }
double f[11][100]

◆ reduceMantissaToNbitsRounding() [1/3]

template<int bits>
static float MiniFloatConverter::reduceMantissaToNbitsRounding ( const float &  f)
inlinestatic

◆ reduceMantissaToNbitsRounding() [2/3]

static float MiniFloatConverter::reduceMantissaToNbitsRounding ( float  f,
int  bits 
)
inlinestatic

Definition at line 84 of file libminifloat.h.

References ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering::pixelStatus::bits, and f.

84  {
85  return ReduceMantissaToNbitsRounding(bits)(f);
86  }
double f[11][100]

◆ reduceMantissaToNbitsRounding() [3/3]

template<typename InItr , typename OutItr >
static void MiniFloatConverter::reduceMantissaToNbitsRounding ( int  bits,
InItr  begin,
InItr  end,
OutItr  out 
)
inlinestatic

Member Data Documentation

◆ basetable

uint16_t MiniFloatConverter::basetable
staticprivate

Definition at line 133 of file libminifloat.h.

Referenced by filltables(), float32to16crop(), and float32to16round().

◆ exponenttable

uint32_t MiniFloatConverter::exponenttable
staticprivate

Definition at line 131 of file libminifloat.h.

Referenced by filltables(), and float16to32().

◆ mantissatable

uint32_t MiniFloatConverter::mantissatable
staticprivate

Definition at line 130 of file libminifloat.h.

Referenced by filltables(), and float16to32().

◆ offsettable

uint16_t MiniFloatConverter::offsettable
staticprivate

Definition at line 132 of file libminifloat.h.

Referenced by filltables(), and float16to32().

◆ shifttable

uint8_t MiniFloatConverter::shifttable
staticprivate

Definition at line 134 of file libminifloat.h.

Referenced by filltables(), float32to16crop(), and float32to16round().