1 #ifndef HeterogeneousCore_CUDAUtilities_interface_HistoContainer_h
2 #define HeterogeneousCore_CUDAUtilities_interface_HistoContainer_h
9 template <
typename Histo,
typename T>
10 __global__ void countFromVector(Histo *__restrict__
h,
12 T const *__restrict__
v,
18 int32_t ih = off - offsets - 1;
25 template <
typename Histo,
typename T>
26 __global__ void fillFromVector(Histo *__restrict__ h,
28 T const *__restrict__ v,
29 uint32_t
const *__restrict__ offsets) {
34 int32_t ih = off - offsets - 1;
37 (*h).fill(v[
i], i, ih);
41 template <
typename Histo,
typename T>
42 inline __attribute__((always_inline)) void fillManyFromVector(Histo *__restrict__ h,
45 uint32_t
const *__restrict__ offsets,
48 typename Histo::index_type *
mem,
57 auto nblocks = (totSize + nthreads - 1) / nthreads;
59 countFromVector<<<nblocks, nthreads, 0, stream>>>(
h,
nh,
v,
offsets);
61 launchFinalize(view,
stream);
62 fillFromVector<<<nblocks, nthreads, 0, stream>>>(
h,
nh,
v,
offsets);
65 countFromVector(h, nh, v, offsets);
67 fillFromVector(h, nh, v, offsets);
72 template <
typename Hist,
typename V,
typename Func>
78 for (
auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
84 template <
typename Hist,
typename V,
typename Func>
89 for (
auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
97 uint32_t
S =
sizeof(
T) * 8,
98 typename I = uint32_t,
109 static constexpr uint32_t
ilog2(uint32_t v) {
110 constexpr uint32_t
b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
111 constexpr uint32_t
s[] = {1, 2, 4, 8, 16};
114 for (
auto i = 4;
i >= 0;
i--)
122 static constexpr uint32_t
sizeT() {
return S; }
124 static constexpr uint32_t
nhists() {
return NHISTS; }
133 constexpr uint32_t
shift = sizeT() - nbits();
134 constexpr uint32_t mask = (1 << nbits()) - 1;
135 return (t >> shift) & mask;
141 Base::atomicIncrement(this->off[b]);
147 auto w = Base::atomicDecrement(this->off[b]);
157 Base::atomicIncrement(this->off[b]);
165 auto w = Base::atomicDecrement(this->off[b]);
174 #endif // HeterogeneousCore_CUDAUtilities_interface_HistoContainer_h
static constexpr auto histOff(uint32_t nh)
uint32_t T const *__restrict__ v
__host__ __device__ constexpr RandomIt upper_bound(RandomIt first, RandomIt last, const T &value, Compare comp={})
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t Func __host__ __device__ V value
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t Func __host__ __device__ V int Func func
static constexpr uint32_t ilog2(uint32_t v)
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type * mem
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
static constexpr uint32_t nbits()
std::function< unsigned int(align::ID)> Counter
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t V
typename Base::index_type index_type
cms::cuda::HistoContainer< uint8_t, 256, 16000, 8, uint16_t > Hist
static constexpr uint32_t nhists()
__attribute__((always_inline)) void countFromVector(Histo *__restrict__ h
const std::complex< double > I
typename Base::Counter Counter
uint32_t T const *__restrict__ uint32_t const *__restrict__ offsets
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int nthreads
static constexpr uint32_t sizeT()
void fill(std::map< std::string, TH1 * > &h, const std::string &s, double x)
static constexpr uint32_t nbins()
static constexpr UT bin(T t)
typename std::make_unsigned< T >::type UT
static constexpr uint32_t totbins()
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t Func __host__ __device__ V int n
static unsigned int const shift
#define cudaCheck(ARG,...)
__host__ __device__ V wmin
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t totSize
__host__ __device__ V V wmax