CMS 3D CMS Logo

HistoContainer.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDAUtilities_interface_HistoContainer_h
2 #define HeterogeneousCore_CUDAUtilities_interface_HistoContainer_h
3 
5 
6 namespace cms {
7  namespace cuda {
8 
9  template <typename Histo, typename T>
10  __global__ void countFromVector(Histo *__restrict__ h,
11  uint32_t nh,
12  T const *__restrict__ v,
13  uint32_t const *__restrict__ offsets) {
14  int first = blockDim.x * blockIdx.x + threadIdx.x;
15  for (int i = first, nt = offsets[nh]; i < nt; i += gridDim.x * blockDim.x) {
16  auto off = cuda_std::upper_bound(offsets, offsets + nh + 1, i);
17  assert((*off) > 0);
18  int32_t ih = off - offsets - 1;
19  assert(ih >= 0);
20  assert(ih < int(nh));
21  (*h).count(v[i], ih);
22  }
23  }
24 
25  template <typename Histo, typename T>
26  __global__ void fillFromVector(Histo *__restrict__ h,
27  uint32_t nh,
28  T const *__restrict__ v,
29  uint32_t const *__restrict__ offsets) {
30  int first = blockDim.x * blockIdx.x + threadIdx.x;
31  for (int i = first, nt = offsets[nh]; i < nt; i += gridDim.x * blockDim.x) {
32  auto off = cuda_std::upper_bound(offsets, offsets + nh + 1, i);
33  assert((*off) > 0);
34  int32_t ih = off - offsets - 1;
35  assert(ih >= 0);
36  assert(ih < int(nh));
37  (*h).fill(v[i], i, ih);
38  }
39  }
40 
41  template <typename Histo, typename T>
42  inline __attribute__((always_inline)) void fillManyFromVector(Histo *__restrict__ h,
43  uint32_t nh,
44  T const *__restrict__ v,
45  uint32_t const *__restrict__ offsets,
46  int32_t totSize,
47  int nthreads,
48  typename Histo::index_type *mem,
49  cudaStream_t stream
50 #ifndef __CUDACC__
51  = cudaStreamDefault
52 #endif
53  ) {
54  typename Histo::View view = {h, nullptr, mem, -1, totSize};
55  launchZero(view, stream);
56 #ifdef __CUDACC__
57  auto nblocks = (totSize + nthreads - 1) / nthreads;
58  assert(nblocks > 0);
59  countFromVector<<<nblocks, nthreads, 0, stream>>>(h, nh, v, offsets);
60  cudaCheck(cudaGetLastError());
61  launchFinalize(view, stream);
62  fillFromVector<<<nblocks, nthreads, 0, stream>>>(h, nh, v, offsets);
63  cudaCheck(cudaGetLastError());
64 #else
65  countFromVector(h, nh, v, offsets);
66  h->finalize();
67  fillFromVector(h, nh, v, offsets);
68 #endif
69  }
70 
71  // iteratate over N bins left and right of the one containing "v"
72  template <typename Hist, typename V, typename Func>
73  __host__ __device__ __forceinline__ void forEachInBins(Hist const &hist, V value, int n, Func func) {
74  int bs = Hist::bin(value);
75  int be = std::min(int(Hist::nbins() - 1), bs + n);
76  bs = std::max(0, bs - n);
77  assert(be >= bs);
78  for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
79  func(*pj);
80  }
81  }
82 
83  // iteratate over bins containing all values in window wmin, wmax
84  template <typename Hist, typename V, typename Func>
85  __host__ __device__ __forceinline__ void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) {
86  auto bs = Hist::bin(wmin);
87  auto be = Hist::bin(wmax);
88  assert(be >= bs);
89  for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
90  func(*pj);
91  }
92  }
93 
94  template <typename T, // the type of the discretized input values
95  uint32_t NBINS, // number of bins
96  int32_t SIZE, // max number of element. If -1 is initialized at runtime using external storage
97  uint32_t S = sizeof(T) * 8, // number of significant bits in T
98  typename I = uint32_t, // type stored in the container (usually an index in a vector of the input values)
99  uint32_t NHISTS = 1 // number of histos stored
100  >
102  public:
104  using View = typename Base::View;
105  using Counter = typename Base::Counter;
106  using index_type = typename Base::index_type;
107  using UT = typename std::make_unsigned<T>::type;
108 
109  static constexpr uint32_t ilog2(uint32_t v) {
110  constexpr uint32_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
111  constexpr uint32_t s[] = {1, 2, 4, 8, 16};
112 
113  uint32_t r = 0; // result of log2(v) will go here
114  for (auto i = 4; i >= 0; i--)
115  if (v & b[i]) {
116  v >>= s[i];
117  r |= s[i];
118  }
119  return r;
120  }
121 
122  static constexpr uint32_t sizeT() { return S; }
123  static constexpr uint32_t nbins() { return NBINS; }
124  static constexpr uint32_t nhists() { return NHISTS; }
125  static constexpr uint32_t totbins() { return NHISTS * NBINS + 1; }
126  static constexpr uint32_t nbits() { return ilog2(NBINS - 1) + 1; }
127 
128  // static_assert(int32_t(totbins())==Base::ctNOnes());
129 
130  static constexpr auto histOff(uint32_t nh) { return NBINS * nh; }
131 
132  static constexpr UT bin(T t) {
133  constexpr uint32_t shift = sizeT() - nbits();
134  constexpr uint32_t mask = (1 << nbits()) - 1;
135  return (t >> shift) & mask;
136  }
137 
139  uint32_t b = bin(t);
140  assert(b < nbins());
141  Base::atomicIncrement(this->off[b]);
142  }
143 
145  uint32_t b = bin(t);
146  assert(b < nbins());
147  auto w = Base::atomicDecrement(this->off[b]);
148  assert(w > 0);
149  this->content[w - 1] = j;
150  }
151 
153  uint32_t b = bin(t);
154  assert(b < nbins());
155  b += histOff(nh);
156  assert(b < totbins());
157  Base::atomicIncrement(this->off[b]);
158  }
159 
161  uint32_t b = bin(t);
162  assert(b < nbins());
163  b += histOff(nh);
164  assert(b < totbins());
165  auto w = Base::atomicDecrement(this->off[b]);
166  assert(w > 0);
167  this->content[w - 1] = j;
168  }
169  };
170 
171  } // namespace cuda
172 } // namespace cms
173 
174 #endif // HeterogeneousCore_CUDAUtilities_interface_HistoContainer_h
cms::cuda::wmax
__host__ __device__ V V wmax
Definition: HistoContainer.h:85
mps_fire.i
i
Definition: mps_fire.py:428
align::Counter
std::function< unsigned int(align::ID)> Counter
Definition: AlignableIndexer.h:31
NBINS
const int NBINS
Definition: CaloCachedShapeIntegrator.cc:3
cms::cuda::HistoContainer::ilog2
static constexpr uint32_t ilog2(uint32_t v)
Definition: HistoContainer.h:109
sistrip::View
View
Definition: ConstantsForView.h:26
cms::cuda::offsets
uint32_t const T *__restrict__ const uint32_t *__restrict__ offsets
Definition: HistoContainer.h:13
nt
int nt
Definition: AMPTWrapper.h:42
min
T min(T a, T b)
Definition: MathUtil.h:58
cms::cuda::HistoContainer::View
typename Base::View View
Definition: HistoContainer.h:104
cms::cuda::stream
uint32_t const T *__restrict__ const uint32_t *__restrict__ int32_t int Histo::index_type cudaStream_t stream
Definition: HistoContainer.h:51
cms::cuda::assert
assert(be >=bs)
h
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
cms::cuda::OneToManyAssoc< I, NHISTS *NBINS+1, SIZE >
__global__
#define __global__
Definition: cudaCompat.h:19
cms::cuda::__attribute__
__attribute__((always_inline)) void countFromVector(Histo *__restrict__ h
cms::cuda::HistoContainer::index_type
typename Base::index_type index_type
Definition: HistoContainer.h:106
alignCSCRings.s
s
Definition: alignCSCRings.py:92
cms::cuda::bs
bs
Definition: HistoContainer.h:76
h
cms::cuda::HistoContainer::nhists
static constexpr uint32_t nhists()
Definition: HistoContainer.h:124
Exhume::I
const std::complex< double > I
Definition: I.h:8
cuda_std::upper_bound
__host__ constexpr __device__ RandomIt upper_bound(RandomIt first, RandomIt last, const T &value, Compare comp={})
Definition: cudastdAlgorithm.h:45
OneToManyAssoc.h
w
const double w
Definition: UKUtility.cc:23
cms::cuda::HistoContainer::Counter
typename Base::Counter Counter
Definition: HistoContainer.h:105
gpuVertexFinder::Hist
cms::cuda::HistoContainer< uint8_t, 256, 16000, 8, uint16_t > Hist
Definition: gpuClusterTracksDBSCAN.h:47
cms::cuda::func
uint32_t const T *__restrict__ const uint32_t *__restrict__ int32_t int Histo::index_type cudaStream_t Func __host__ __device__ V int Func func
Definition: HistoContainer.h:73
cms::cuda::n
uint32_t const T *__restrict__ const uint32_t *__restrict__ int32_t int Histo::index_type cudaStream_t Func __host__ __device__ V int n
Definition: HistoContainer.h:73
LaserClient_cfi.nbins
nbins
Definition: LaserClient_cfi.py:51
submitPVResolutionJobs.count
count
Definition: submitPVResolutionJobs.py:352
cms::cuda::HistoContainer::nbits
static constexpr uint32_t nbits()
Definition: HistoContainer.h:126
b
double b
Definition: hdecay.h:118
first
auto first
Definition: CAHitNtupletGeneratorKernelsImpl.h:125
cms::cudacompat::gridDim
const dim3 gridDim
Definition: cudaCompat.h:33
S
double S(const TLorentzVector &, const TLorentzVector &)
Definition: Particle.cc:97
cms::cuda::HistoContainer::sizeT
static constexpr uint32_t sizeT()
Definition: HistoContainer.h:122
ntuplemaker.fill
fill
Definition: ntuplemaker.py:304
Skims_PA_cff.content
content
Definition: Skims_PA_cff.py:19
cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:30
SiStripPI::max
Definition: SiStripPayloadInspectorHelper.h:169
gpuVertexFinder::hist
__shared__ Hist hist
Definition: gpuClusterTracksDBSCAN.h:48
cms::cuda::V
uint32_t const T *__restrict__ const uint32_t *__restrict__ int32_t int Histo::index_type cudaStream_t V
Definition: HistoContainer.h:51
gainCalibHelper::gainCalibPI::type
type
Definition: SiPixelGainCalibHelper.h:40
cms::cuda::nthreads
uint32_t const T *__restrict__ const uint32_t *__restrict__ int32_t int nthreads
Definition: HistoContainer.h:43
cms::cuda::wmin
__host__ __device__ V wmin
Definition: HistoContainer.h:85
value
Definition: value.py:1
cms::cuda::HistoContainer::UT
typename std::make_unsigned< T >::type UT
Definition: HistoContainer.h:107
__device__
#define __device__
Definition: SiPixelGainForHLTonGPU.h:15
cms::cuda::v
uint32_t const T *__restrict__ v
Definition: HistoContainer.h:11
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:29
__forceinline__
#define __forceinline__
Definition: cudaCompat.h:22
alignCSCRings.r
r
Definition: alignCSCRings.py:93
cms::cuda::nh
uint32_t nh
Definition: HistoContainer.h:11
newFWLiteAna.bin
bin
Definition: newFWLiteAna.py:161
cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
cms::cuda::HistoContainer::nbins
static constexpr uint32_t nbins()
Definition: HistoContainer.h:123
edm::shift
static unsigned const int shift
Definition: LuminosityBlockID.cc:7
cms::cuda::mem
uint32_t const T *__restrict__ const uint32_t *__restrict__ int32_t int Histo::index_type * mem
Definition: HistoContainer.h:43
T
long double T
Definition: Basic3DVectorLD.h:48
cms::cuda::totSize
uint32_t const T *__restrict__ const uint32_t *__restrict__ int32_t totSize
Definition: HistoContainer.h:43
cms::cuda::HistoContainer::bin
static constexpr UT bin(T t)
Definition: HistoContainer.h:132
ecalDigis_cff.cuda
cuda
Definition: ecalDigis_cff.py:35
S
Definition: CSCDBL1TPParametersExtended.h:16
cms::cuda::OneToManyAssoc< I, NHISTS *NBINS+1, SIZE >::index_type
I index_type
Definition: OneToManyAssoc.h:150
cms::cuda::be
int be
Definition: HistoContainer.h:75
cms::cuda::HistoContainer
Definition: HistoContainer.h:101
trklet::ilog2
int ilog2(double factor)
Definition: Util.h:106
dqmiolumiharvest.j
j
Definition: dqmiolumiharvest.py:66
cms::cuda::HistoContainer::totbins
static constexpr uint32_t totbins()
Definition: HistoContainer.h:125
submitPVValidationJobs.t
string t
Definition: submitPVValidationJobs.py:644
__host__
#define __host__
Definition: SiPixelGainForHLTonGPU.h:12
cms::cuda::HistoContainer::histOff
static constexpr auto histOff(uint32_t nh)
Definition: HistoContainer.h:130
cms
Namespace of DDCMS conversion namespace.
Definition: ProducerAnalyzer.cc:21
cms::cudacompat::blockIdx
const dim3 blockIdx
Definition: cudaCompat.h:32