CMS 3D CMS Logo

OneToManyAssoc.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_AlpakaInterface_interface_OneToManyAssoc_h
2 #define HeterogeneousCore_AlpakaInterface_interface_OneToManyAssoc_h
3 
4 #include <algorithm>
5 #include <cstddef>
6 #include <cstdint>
7 #include <type_traits>
8 
9 #include <alpaka/alpaka.hpp>
10 
15 
16 namespace cms::alpakatools {
17 
18  template <typename I, // type stored in the container (usually an index in a vector of the input values)
19  int32_t ONES, // number of "Ones" +1. If -1 is initialized at runtime using external storage
20  int32_t SIZE // max number of element. If -1 is initialized at runtime using external storage
21  >
23  public:
24  using Counter = uint32_t;
25 
27 
28  using index_type = I;
29 
30  struct View {
32  Counter *offStorage = nullptr;
34  int32_t offSize = -1;
35  int32_t contentSize = -1;
36  };
37 
38  static constexpr int32_t ctNOnes() { return ONES; }
39  constexpr auto totOnes() const { return off.capacity(); }
40  constexpr auto nOnes() const { return totOnes() - 1; }
41  static constexpr int32_t ctCapacity() { return SIZE; }
42  constexpr auto capacity() const { return content.capacity(); }
43 
44  ALPAKA_FN_HOST_ACC void initStorage(View view) {
45  ALPAKA_ASSERT_ACC(view.assoc == this);
46  if constexpr (ctCapacity() < 0) {
47  ALPAKA_ASSERT_ACC(view.contentStorage);
48  ALPAKA_ASSERT_ACC(view.contentSize > 0);
49  content.init(view.contentStorage, view.contentSize);
50  }
51  if constexpr (ctNOnes() < 0) {
52  ALPAKA_ASSERT_ACC(view.offStorage);
53  ALPAKA_ASSERT_ACC(view.offSize > 0);
54  off.init(view.offStorage, view.offSize);
55  }
56  }
57 
58  ALPAKA_FN_HOST_ACC void zero() {
59  for (int32_t i = 0; i < totOnes(); ++i) {
60  off[i] = 0;
61  }
62  }
63 
64  template <typename TAcc>
65  ALPAKA_FN_ACC ALPAKA_FN_INLINE void add(const TAcc &acc, CountersOnly const &co) {
66  for (uint32_t i = 0; static_cast<int>(i) < totOnes(); ++i) {
68  }
69  }
70 
71  template <typename TAcc>
72  ALPAKA_FN_ACC ALPAKA_FN_INLINE static uint32_t atomicIncrement(const TAcc &acc, Counter &x) {
73  return alpaka::atomicAdd(acc, &x, 1u, alpaka::hierarchy::Blocks{});
74  }
75 
76  template <typename TAcc>
77  ALPAKA_FN_ACC ALPAKA_FN_INLINE static uint32_t atomicDecrement(const TAcc &acc, Counter &x) {
78  return alpaka::atomicSub(acc, &x, 1u, alpaka::hierarchy::Blocks{});
79  }
80 
81  template <typename TAcc>
82  ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, I b) {
83  ALPAKA_ASSERT_ACC(b < static_cast<uint32_t>(nOnes()));
84  atomicIncrement(acc, off[b]);
85  }
86 
87  template <typename TAcc>
88  ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, I b, index_type j) {
89  ALPAKA_ASSERT_ACC(b < static_cast<uint32_t>(nOnes()));
90  auto w = atomicDecrement(acc, off[b]);
91  ALPAKA_ASSERT_ACC(w > 0);
92  content[w - 1] = j;
93  }
94 
95  // this MUST BE DONE in a single block (or in two kernels!)
96  struct zeroAndInit {
97  template <typename TAcc>
98  ALPAKA_FN_ACC void operator()(const TAcc &acc, View view) const {
99  ALPAKA_ASSERT_ACC((1 == alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0]));
100  ALPAKA_ASSERT_ACC((0 == alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0]));
101  auto h = view.assoc;
103  h->psws = 0;
104  h->initStorage(view);
105  }
106  alpaka::syncBlockThreads(acc);
107  for (int i : cms::alpakatools::independent_group_elements(acc, h->totOnes())) {
108  h->off[i] = 0;
109  }
110  }
111  };
112 
113  template <typename TAcc, typename TQueue>
114  ALPAKA_FN_INLINE static void launchZero(OneToManyAssocBase *h, TQueue &queue) {
115  View view = {h, nullptr, nullptr, -1, -1};
116  launchZero<TAcc>(view, queue);
117  }
118 
119  template <typename TAcc, typename TQueue>
120  ALPAKA_FN_INLINE static void launchZero(View view, TQueue &queue) {
121  if constexpr (ctCapacity() < 0) {
122  ALPAKA_ASSERT_ACC(view.contentStorage);
123  ALPAKA_ASSERT_ACC(view.contentSize > 0);
124  }
125  if constexpr (ctNOnes() < 0) {
126  ALPAKA_ASSERT_ACC(view.offStorage);
127  ALPAKA_ASSERT_ACC(view.offSize > 0);
128  }
129  if constexpr (!requires_single_thread_per_block_v<TAcc>) {
130  auto nthreads = 1024;
131  auto nblocks = 1; // MUST BE ONE as memory is initialize in thread 0 (alternative is two kernels);
132  auto workDiv = cms::alpakatools::make_workdiv<TAcc>(nblocks, nthreads);
133  alpaka::exec<TAcc>(queue, workDiv, zeroAndInit{}, view);
134  } else {
135  auto h = view.assoc;
137  h->initStorage(view);
138  h->zero();
139  h->psws = 0;
140  }
141  }
142 
143  constexpr auto size() const { return uint32_t(off[totOnes() - 1]); }
144  constexpr auto size(uint32_t b) const { return off[b + 1] - off[b]; }
145 
146  constexpr index_type const *begin() const { return content.data(); }
147  constexpr index_type const *end() const { return begin() + size(); }
148 
149  constexpr index_type const *begin(uint32_t b) const { return content.data() + off[b]; }
150  constexpr index_type const *end(uint32_t b) const { return content.data() + off[b + 1]; }
151 
154  int32_t psws; // prefix-scan working space
155  };
156 
157  template <typename I, // type stored in the container (usually an index in a vector of the input values)
158  int32_t ONES, // number of "Ones" +1. If -1 is initialized at runtime using external storage
159  int32_t SIZE // max number of element. If -1 is initialized at runtime using external storage
160  >
161  class OneToManyAssocSequential : public OneToManyAssocBase<I, ONES, SIZE> {
162  public:
164 
165  template <typename TAcc>
166  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE int32_t
167  bulkFill(const TAcc &acc, AtomicPairCounter &apc, index_type const *v, uint32_t n) {
168  auto c = apc.inc_add(acc, n);
169  if (int(c.first) >= this->nOnes())
170  return -int32_t(c.first);
171  this->off[c.first] = c.second;
172  for (uint32_t j = 0; j < n; ++j)
173  this->content[c.second + j] = v[j];
174  return c.first;
175  }
176 
177  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void bulkFinalize(AtomicPairCounter const &apc) {
178  this->off[apc.get().first] = apc.get().second;
179  }
180 
181  template <typename TAcc>
182  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void bulkFinalizeFill(TAcc &acc, AtomicPairCounter const &apc) {
183  int f = apc.get().first;
184  auto s = apc.get().second;
185  if (f >= this->nOnes()) { // overflow!
186  this->off[this->nOnes()] = uint32_t(this->off[this->nOnes() - 1]);
187  return;
188  }
189  auto first = f + alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
190  for (int i = first; i < this->totOnes(); i += alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0]) {
191  this->off[i] = s;
192  }
193  }
194 
195  struct finalizeBulk {
196  template <typename TAcc>
197  ALPAKA_FN_ACC void operator()(const TAcc &acc,
198  AtomicPairCounter const *apc,
199  OneToManyAssocSequential *__restrict__ assoc) const {
200  assoc->bulkFinalizeFill(acc, *apc);
201  }
202  };
203  };
204 
205  template <typename I, // type stored in the container (usually an index in a vector of the input values)
206  int32_t ONES, // number of "Ones" +1. If -1 is initialized at runtime using external storage
207  int32_t SIZE // max number of element. If -1 is initialized at runtime using external storage
208  >
209  class OneToManyAssocRandomAccess : public OneToManyAssocBase<I, ONES, SIZE> {
210  public:
213 
214  template <typename TAcc>
215  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize(TAcc &acc, Counter *ws = nullptr) {
216  ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == 0);
217  blockPrefixScan(acc, this->off.data(), this->totOnes(), ws);
218  ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == this->off[this->totOnes() - 2]);
219  }
220 
221  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize() {
222  // Single thread finalize.
223  for (uint32_t i = 1; static_cast<int>(i) < this->totOnes(); ++i)
224  this->off[i] += this->off[i - 1];
225  }
226 
227  template <typename TAcc, typename TQueue>
228  ALPAKA_FN_INLINE static void launchFinalize(OneToManyAssocRandomAccess *h, TQueue &queue) {
229  View view = {h, nullptr, nullptr, -1, -1};
230  launchFinalize<TAcc>(view, queue);
231  }
232 
233  template <typename TAcc, typename TQueue>
234  ALPAKA_FN_INLINE static void launchFinalize(View view, TQueue &queue) {
235  // View stores a base pointer, we need to upcast back...
236  auto h = static_cast<OneToManyAssocRandomAccess *>(view.assoc);
238  if constexpr (!requires_single_thread_per_block_v<TAcc>) {
239  Counter *poff = (Counter *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, off));
242  ALPAKA_ASSERT_ACC(view.offStorage);
243  ALPAKA_ASSERT_ACC(view.offSize > 0);
244  nOnes = view.offSize;
245  poff = view.offStorage;
246  }
248  int32_t *ppsws = (int32_t *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, psws));
249  auto nthreads = 1024;
250  auto nblocks = (nOnes + nthreads - 1) / nthreads;
251  auto workDiv = cms::alpakatools::make_workdiv<TAcc>(nblocks, nthreads);
252  alpaka::exec<TAcc>(queue,
253  workDiv,
255  poff,
256  poff,
257  nOnes,
258  nblocks,
259  ppsws,
260  alpaka::getWarpSizes(alpaka::getDev(queue))[0]);
261  } else {
262  h->finalize();
263  }
264  }
265  };
266 
267 } // namespace cms::alpakatools
268 
269 #endif // HeterogeneousCore_CUDAUtilities_interface_HistoContainer_h
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE int32_t bulkFill(const TAcc &acc, AtomicPairCounter &apc, index_type const *v, uint32_t n)
cudaStream_t int32_t ONES
constexpr index_type const * begin() const
ALPAKA_FN_ACC constexpr bool once_per_block(TAcc const &acc)
static ALPAKA_FN_INLINE void launchZero(View view, TQueue &queue)
constexpr auto size(uint32_t b) const
T w() const
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:73
FlexiStorage< Counter, ONES > off
static ALPAKA_FN_INLINE void launchZero(OneToManyAssocBase *h, TQueue &queue)
ALPAKA_FN_ACC auto independent_group_elements(TAcc const &acc, TArgs... args)
__host__ __device__ VT * co
Definition: prefixScan.h:47
ALPAKA_FN_HOST_ACC void initStorage(View view)
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize(TAcc &acc, Counter *ws=nullptr)
static constexpr int32_t ctNOnes()
constexpr index_type const * end() const
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize()
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, I b)
ALPAKA_FN_ACC void operator()(const TAcc &acc, View view) const
__device__ __host__ Counters get() const
std::vector< Block > Blocks
Definition: Block.h:99
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void bulkFinalizeFill(TAcc &acc, AtomicPairCounter const &apc)
const std::complex< double > I
Definition: I.h:8
double f[11][100]
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int nthreads
ALPAKA_FN_ACC constexpr Counters get() const
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void bulkFinalize(AtomicPairCounter const &apc)
static constexpr int32_t ctCapacity()
ALPAKA_FN_ACC void operator()(const TAcc &acc, AtomicPairCounter const *apc, OneToManyAssocSequential *__restrict__ assoc) const
constexpr index_type const * end(uint32_t b) const
typename OneToManyAssocBase< I, ONES, SIZE >::View View
constexpr index_type const * begin(uint32_t b) const
double b
Definition: hdecay.h:120
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, I b, index_type j)
ALPAKA_FN_HOST_ACC void zero()
static ALPAKA_FN_INLINE void launchFinalize(View view, TQueue &queue)
constexpr int capacity() const
Definition: FlexiStorage.h:12
ALPAKA_FN_ACC ALPAKA_FN_INLINE void blockPrefixScan(const TAcc &acc, T const *ci, T *co, int32_t size, T *ws=nullptr)
Definition: prefixScan.h:47
ALPAKA_FN_ACC ALPAKA_FN_INLINE void add(const TAcc &acc, CountersOnly const &co)
FlexiStorage< index_type, SIZE > content
TupleMultiplicity< TrackerTraits > const HitToTuple< TrackerTraits > const cms::cuda::AtomicPairCounter * apc
float x
ALPAKA_FN_ACC static ALPAKA_FN_INLINE uint32_t atomicIncrement(const TAcc &acc, Counter &x)
static ALPAKA_FN_INLINE void launchFinalize(OneToManyAssocRandomAccess *h, TQueue &queue)
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
ALPAKA_FN_ACC static ALPAKA_FN_INLINE uint32_t atomicDecrement(const TAcc &acc, Counter &x)