CMS 3D CMS Logo

HistoContainer.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h
2 #define HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h
3 
4 #include <algorithm>
5 #include <cstddef>
6 #include <cstdint>
7 #include <type_traits>
8 
9 #include <alpaka/alpaka.hpp>
10 
17 
18 namespace cms::alpakatools {
19 
20  struct countFromVector {
21  template <typename TAcc, typename Histo, typename T>
22  ALPAKA_FN_ACC void operator()(const TAcc &acc,
23  Histo *__restrict__ h,
24  uint32_t nh,
25  T const *__restrict__ v,
26  uint32_t const *__restrict__ offsets) const {
27  const uint32_t nt = offsets[nh];
28  for (uint32_t i : uniform_elements(acc, nt)) {
29  auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
30  ALPAKA_ASSERT_ACC((*off) > 0);
31  int32_t ih = off - offsets - 1;
32  ALPAKA_ASSERT_ACC(ih >= 0);
33  ALPAKA_ASSERT_ACC(ih < int(nh));
34  h->count(acc, v[i], ih);
35  }
36  }
37  };
38 
39  struct fillFromVector {
40  template <typename TAcc, typename Histo, typename T>
41  ALPAKA_FN_ACC void operator()(const TAcc &acc,
42  Histo *__restrict__ h,
43  uint32_t nh,
44  T const *__restrict__ v,
45  uint32_t const *__restrict__ offsets) const {
46  const uint32_t nt = offsets[nh];
47  for (uint32_t i : uniform_elements(acc, nt)) {
48  auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
49  ALPAKA_ASSERT_ACC((*off) > 0);
50  int32_t ih = off - offsets - 1;
51  ALPAKA_ASSERT_ACC(ih >= 0);
52  ALPAKA_ASSERT_ACC(ih < int(nh));
53  h->fill(acc, v[i], i, ih);
54  }
55  }
56  };
57 
58  template <typename TAcc, typename Histo, typename T, typename TQueue>
59  ALPAKA_FN_INLINE void fillManyFromVector(Histo *__restrict__ h,
60  uint32_t nh,
61  T const *__restrict__ v,
62  uint32_t const *__restrict__ offsets,
63  uint32_t totSize,
64  uint32_t nthreads,
65  TQueue &queue) {
66  Histo::template launchZero<TAcc>(h, queue);
67 
68  const auto threadsPerBlockOrElementsPerThread = nthreads;
69  const auto blocksPerGrid = divide_up_by(totSize, nthreads);
70  const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
71 
72  alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
73  Histo::template launchFinalize<TAcc>(h, queue);
74 
75  alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
76  }
77 
78  template <typename TAcc, typename Histo, typename T, typename TQueue>
79  ALPAKA_FN_INLINE void fillManyFromVector(Histo *__restrict__ h,
80  typename Histo::View hv,
81  uint32_t nh,
82  T const *__restrict__ v,
83  uint32_t const *__restrict__ offsets,
84  uint32_t totSize,
85  uint32_t nthreads,
86  TQueue &queue) {
87  Histo::template launchZero<TAcc>(hv, queue);
88 
89  const auto threadsPerBlockOrElementsPerThread = nthreads;
90  const auto blocksPerGrid = divide_up_by(totSize, nthreads);
91  const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
92 
93  alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
94  Histo::template launchFinalize<TAcc>(h, queue);
95 
96  alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
97  }
98 
99  // iteratate over N bins left and right of the one containing "v"
100  template <typename Hist, typename V, typename Func>
101  ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func) {
102  int bs = Hist::bin(value);
103  int be = std::min(int(Hist::nbins() - 1), bs + n);
104  bs = std::max(0, bs - n);
105  ALPAKA_ASSERT_ACC(be >= bs);
106  for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
107  func(*pj);
108  }
109  }
110 
111  // iteratate over bins containing all values in window wmin, wmax
112  template <typename Hist, typename V, typename Func>
113  ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) {
114  auto bs = Hist::bin(wmin);
115  auto be = Hist::bin(wmax);
116  ALPAKA_ASSERT_ACC(be >= bs);
117  for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
118  func(*pj);
119  }
120  }
121 
122  template <typename T, // the type of the discretized input values
123  uint32_t NBINS, // number of bins
124  int32_t SIZE, // max number of element. If -1 is initialized at runtime using external storage
125  uint32_t S = sizeof(T) * 8, // number of significant bits in T
126  typename I = uint32_t, // type stored in the container (usually an index in a vector of the input values)
127  uint32_t NHISTS = 1 // number of histos stored
128  >
130  public:
132  using View = typename Base::View;
133  using Counter = typename Base::Counter;
134  using index_type = typename Base::index_type;
135  using UT = typename std::make_unsigned<T>::type;
136 
137  static constexpr uint32_t ilog2(uint32_t v) {
138  constexpr uint32_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
139  constexpr uint32_t s[] = {1, 2, 4, 8, 16};
140 
141  uint32_t r = 0; // result of log2(v) will go here
142  for (auto i = 4; i >= 0; i--)
143  if (v & b[i]) {
144  v >>= s[i];
145  r |= s[i];
146  }
147  return r;
148  }
149 
150  static constexpr uint32_t sizeT() { return S; }
151  static constexpr int32_t nhists() { return NHISTS; }
152  static constexpr uint32_t nbins() { return NBINS; }
153  static constexpr uint32_t totbins() { return NHISTS * NBINS + 1; }
154  static constexpr uint32_t nbits() { return ilog2(NBINS - 1) + 1; }
155 
156  static constexpr auto histOff(uint32_t nh) { return NBINS * nh; }
157 
158  static constexpr UT bin(T t) {
159  constexpr uint32_t shift = sizeT() - nbits();
160  constexpr uint32_t mask = (1 << nbits()) - 1;
161  return (t >> shift) & mask;
162  }
163 
164  template <typename TAcc>
165  ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t) {
166  uint32_t b = bin(t);
168  Base::atomicIncrement(acc, this->off[b]);
169  }
170 
171  template <typename TAcc>
172  ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j) {
173  uint32_t b = bin(t);
175  auto w = Base::atomicDecrement(acc, this->off[b]);
176  ALPAKA_ASSERT_ACC(w > 0);
177  this->content[w - 1] = j;
178  }
179 
180  template <typename TAcc>
181  ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh) {
182  uint32_t b = bin(t);
184  b += histOff(nh);
185  ALPAKA_ASSERT_ACC(b < totbins());
186  Base::atomicIncrement(acc, this->off[b]);
187  }
188 
189  template <typename TAcc>
190  ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh) {
191  uint32_t b = bin(t);
193  b += histOff(nh);
194  ALPAKA_ASSERT_ACC(b < totbins());
195  auto w = Base::atomicDecrement(acc, this->off[b]);
196  ALPAKA_ASSERT_ACC(w > 0);
197  this->content[w - 1] = j;
198  }
199  };
200 } // namespace cms::alpakatools
201 #endif // HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h
cms::alpakatools::HistoContainer< uint8_t, 256, 16000, 8, uint16_t > Hist
ALPAKA_FN_HOST_ACC constexpr RandomIt upper_bound(RandomIt first, RandomIt last, const T &value, Compare comp={})
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
typename std::make_unsigned< T >::type UT
T w() const
typename Base::index_type index_type
static constexpr uint32_t nbins()
static constexpr uint32_t sizeT()
ALPAKA_FN_INLINE void fillManyFromVector(Histo *__restrict__ h, uint32_t nh, T const *__restrict__ v, uint32_t const *__restrict__ offsets, uint32_t totSize, uint32_t nthreads, TQueue &queue)
uint32_t T const *__restrict__ uint32_t const *__restrict__ offsets
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j)
std::function< unsigned int(align::ID)> Counter
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t V
static constexpr UT bin(T t)
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t Func __host__ __device__ V int Func func
int ilog2(double factor)
Definition: Util.h:121
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func)
uint32_t T const *__restrict__ v
ALPAKA_FN_ACC void operator()(const TAcc &acc, Histo *__restrict__ h, uint32_t nh, T const *__restrict__ v, uint32_t const *__restrict__ offsets) const
static constexpr uint32_t totbins()
ALPAKA_FN_ACC void operator()(const TAcc &acc, Histo *__restrict__ h, uint32_t nh, T const *__restrict__ v, uint32_t const *__restrict__ offsets) const
static constexpr int32_t nhists()
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func)
const std::complex< double > I
Definition: I.h:8
def template(fileName, svg, replaceme="REPLACEME")
Definition: svgfig.py:521
typename Base::Counter Counter
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh)
Definition: value.py:1
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int nthreads
uint32_t nh
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t)
int nt
Definition: AMPTWrapper.h:42
static constexpr auto histOff(uint32_t nh)
static constexpr uint32_t nbits()
double b
Definition: hdecay.h:120
static constexpr uint32_t ilog2(uint32_t v)
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t Func __host__ __device__ V int n
static unsigned int const shift
__host__ __device__ V wmin
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
long double T
const int NBINS
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t totSize
__host__ __device__ V V wmax