CMS 3D CMS Logo

gpuClusterChargeCut.h
Go to the documentation of this file.
1 #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
2 #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
3 
4 #include <cstdint>
5 #include <cstdio>
6 
11 
12 // local include(s)
14 
15 namespace gpuClustering {
16 
17  template <typename TrackerTraits>
20  clusterThresholds, // charge cut on cluster in electrons (for layer 1 and for other layers)
21  uint16_t* __restrict__ id, // module id of each pixel (modified if bad cluster)
22  uint16_t const* __restrict__ adc, // charge of each pixel
23  uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module
24  uint32_t* __restrict__ nClustersInModule, // modified: number of clusters found in each module
25  uint32_t const* __restrict__ moduleId, // module id of each module
26  int32_t* __restrict__ clusterId, // modified: cluster id of each pixel
27  uint32_t numElements) {
29 
30  __shared__ int32_t charge[maxNumClustersPerModules];
31  __shared__ uint8_t ok[maxNumClustersPerModules];
32  __shared__ uint16_t newclusId[maxNumClustersPerModules];
33 
35  [[maybe_unused]] constexpr int nMaxModules = TrackerTraits::numberOfModules;
36 
39 
42  for (auto module = firstModule; module < endModule; module += gridDim.x) {
43  auto firstPixel = moduleStart[1 + module];
44  auto thisModuleId = id[firstPixel];
45  while (thisModuleId == invalidModuleId and firstPixel < numElements) {
46  // skip invalid or duplicate pixels
47  ++firstPixel;
48  thisModuleId = id[firstPixel];
49  }
50  if (firstPixel >= numElements) {
51  // reached the end of the input while skipping the invalid pixels, nothing left to do
52  break;
53  }
54  if (thisModuleId != moduleId[module]) {
55  // reached the end of the module while skipping the invalid pixels, skip this module
56  continue;
57  }
58  assert(thisModuleId < nMaxModules);
59 
60  auto nclus = nClustersInModule[thisModuleId];
61  if (nclus == 0)
62  continue;
63 
64  if (threadIdx.x == 0 && nclus > maxNumClustersPerModules)
65  printf("Warning too many clusters in module %d in block %d: %d > %d\n",
66  thisModuleId,
67  blockIdx.x,
68  nclus,
70 
71  auto first = firstPixel + threadIdx.x;
72 
73  if (nclus > maxNumClustersPerModules) {
74  // remove excess FIXME find a way to cut charge first....
75  for (auto i = first; i < numElements; i += blockDim.x) {
76  if (id[i] == invalidModuleId)
77  continue; // not valid
78  if (id[i] != thisModuleId)
79  break; // end of module
81  id[i] = invalidModuleId;
83  }
84  }
86  }
87 
88 #ifdef GPU_DEBUG
89  if (thisModuleId % 100 == 1)
90  if (threadIdx.x == 0)
91  printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x);
92 #endif
93 
95  for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
96  charge[i] = 0;
97  }
98  __syncthreads();
99 
100  for (auto i = first; i < numElements; i += blockDim.x) {
101  if (id[i] == invalidModuleId)
102  continue; // not valid
103  if (id[i] != thisModuleId)
104  break; // end of module
106  }
107  __syncthreads();
108 
109  auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
110 
111  bool good = true;
112  for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
113  newclusId[i] = ok[i] = charge[i] >= chargeCut ? 1 : 0;
114  if (0 == ok[i])
115  good = false;
116  }
117 
118  // if all clusters above threshold do nothing
119  if (__syncthreads_and(good))
120  continue;
121 
122  // renumber
123  __shared__ uint16_t ws[32];
124  constexpr auto maxThreads = 1024;
125  auto minClust = nclus > maxThreads ? maxThreads : nclus;
126 
127  cms::cuda::blockPrefixScan(newclusId, newclusId, minClust, ws);
128  if constexpr (maxNumClustersPerModules > maxThreads) //only if needed
129  {
130  for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) {
131  cms::cuda::blockPrefixScan(newclusId + offset, newclusId + offset, nclus - offset, ws);
132 
133  for (uint32_t i = threadIdx.x + offset; i < nclus; i += blockDim.x) {
134  uint32_t prevBlockEnd = ((i / maxThreads) * maxThreads) - 1;
135  newclusId[i] += newclusId[prevBlockEnd];
136  }
137 
138  __syncthreads();
139  }
140  }
141  assert(nclus > newclusId[nclus - 1]);
142 
143  nClustersInModule[thisModuleId] = newclusId[nclus - 1];
144 
145  // reassign id
146  for (auto i = first; i < numElements; i += blockDim.x) {
147  if (id[i] == invalidModuleId)
148  continue; // not valid
149  if (id[i] != thisModuleId)
150  break; // end of module
151  if (0 == ok[clusterId[i]])
152  clusterId[i] = id[i] = invalidModuleId;
153  else
154  clusterId[i] = newclusId[clusterId[i]] - 1;
155  }
156 
157  //done
158  __syncthreads();
159  } // loop on modules
160  }
161 
162 } // namespace gpuClustering
163 
164 #endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
const dim3 threadIdx
Definition: cudaCompat.h:29
__shared__ uint8_t ok[maxNumClustersPerModules]
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ nClustersInModule
constexpr int nMaxModules
const dim3 gridDim
Definition: cudaCompat.h:33
#define __global__
Definition: cudaCompat.h:19
__shared__ int32_t charge[maxNumClustersPerModules]
const dim3 blockDim
Definition: cudaCompat.h:30
float clusterChargeCut(const edm::ParameterSet &conf, const char *name="clusterChargeCut")
chargeCut
Definition: DMR_cfg.py:159
constexpr uint16_t numberOfModules
constexpr int32_t getThresholdForLayerOnCondition(bool isLayer1) const noexcept
auto &__restrict__ ws
constexpr uint32_t maxNumClustersPerModules
assert(nMaxModules< maxNumModules)
constexpr int startBPIX2
constexpr uint16_t maxNumModules
const dim3 blockIdx
Definition: cudaCompat.h:32
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ clusterId
constexpr uint16_t invalidModuleId
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ uint32_t numElements
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
__shared__ uint16_t newclusId[maxNumClustersPerModules]
void __syncthreads()
Definition: cudaCompat.h:132
static constexpr uint32_t layerStart[numberOfLayers+1]
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ moduleId
bool __syncthreads_and(bool x)
Definition: cudaCompat.h:135
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
uint16_t *__restrict__ uint16_t const *__restrict__ adc