1 #ifndef HeterogeneousCore_CUDAUtilities_interface_OneToManyAssoc_h 2 #define HeterogeneousCore_CUDAUtilities_interface_OneToManyAssoc_h 7 #endif // __CUDA_ARCH__ 10 #include <type_traits> 22 template <
typename Assoc>
35 template <
typename Assoc>
53 template <
typename Assoc>
60 typename Assoc::View view = {
h,
nullptr,
nullptr, -1, -1};
63 template <
typename Assoc>
64 inline __attribute__((always_inline))
void launchZero(OneToManyAssocView<Assoc> view,
71 if constexpr (Assoc::ctCapacity() < 0) {
72 assert(view.contentStorage);
73 assert(view.contentSize > 0);
75 if constexpr (Assoc::ctNOnes() < 0) {
82 zeroAndInit<<<nblocks, nthreads, 0, stream>>>(view);
93 template <
typename Assoc>
94 inline __attribute__((always_inline))
void launchFinalize(Assoc *
h,
100 typename Assoc::View view = {
h,
nullptr,
nullptr, -1, -1};
101 launchFinalize(view,
stream);
104 template <
typename Assoc>
105 inline __attribute__((always_inline))
void launchFinalize(OneToManyAssocView<Assoc> view,
116 auto nOnes = Assoc::ctNOnes();
117 if constexpr (Assoc::ctNOnes() < 0) {
120 nOnes = view.offSize;
121 poff = view.offStorage;
124 int32_t *ppsws = (int32_t *)((
char *)(
h) + offsetof(Assoc, psws));
127 multiBlockPrefixScan<<<nblocks, nthreads, sizeof(int32_t) * nblocks, stream>>>(poff, poff, nOnes, ppsws);
134 template <
typename Assoc>
135 __global__ void finalizeBulk(AtomicPairCounter
const *
apc, Assoc *__restrict__
assoc) {
139 template <
typename I,
152 static constexpr uint32_t
ilog2(uint32_t
v) {
153 constexpr uint32_t
b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
154 constexpr uint32_t
s[] = {1, 2, 4, 8, 16};
157 for (
auto i = 4;
i >= 0;
i--)
196 auto &
a = (std::atomic<Counter> &)(
off[
i]);
206 auto &
a = (std::atomic<Counter> &)(
x);
215 auto &
a = (std::atomic<Counter> &)(
x);
222 atomicIncrement(
off[
b]);
235 return -int32_t(
c.m);
237 for (uint32_t
j = 0;
j <
n; ++
j)
248 auto n =
apc.get().
n;
265 constexpr
auto size()
const {
return uint32_t(
off[
totOnes() - 1]); }
266 constexpr
auto size(uint32_t
b)
const {
return off[
b + 1] -
off[
b]; }
282 #endif // HeterogeneousCore_CUDAUtilities_interface_HistoContainer_h
cudaStream_t int32_t ONES
FlexiStorage< index_type, SIZE > content
T1 atomicSub(T1 *a, T2 b)
index_type * contentStorage
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
__host__ __device__ VT * co
__attribute__((always_inline)) void countFromVector(Histo *__restrict__ h
std::function< unsigned int(align::ID)> Counter
__host__ __device__ index_type const * v
__host__ __device__ ONES off
static constexpr uint32_t ilog2(uint32_t v)
__host__ __device__ index_type j
const std::complex< double > I
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int nthreads
__host__ __device__ void initStorage(View view)
constexpr auto nOnes() const
typename Assoc::Counter Counter
Namespace of DDCMS conversion namespace.
static constexpr int32_t ctCapacity()
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter * apc
__host__ __device__ VT uint32_t size
static constexpr int32_t ctNOnes()
__host__ __device__ void zero()
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
constexpr auto totOnes() const
typename Assoc::index_type index_type
#define cudaCheck(ARG,...)
constexpr auto capacity() const
__host__ __device__ index_type const uint32_t n
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
T1 atomicAdd(T1 *a, T2 b)