CMS 3D CMS Logo

gpuClusterChargeCut.h
Go to the documentation of this file.
1 #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
2 #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
3 
4 #include <cstdint>
5 #include <cstdio>
6 
11 
12 // local include(s)
14 
15 namespace gpuClustering {
16 
17  template <typename TrackerTraits>
20  clusterThresholds, // charge cut on cluster in electrons (for layer 1 and for other layers)
21  uint16_t* __restrict__ id, // module id of each pixel (modified if bad cluster)
22  uint16_t const* __restrict__ adc, // charge of each pixel
23  uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module
24  uint32_t* __restrict__ nClustersInModule, // modified: number of clusters found in each module
25  uint32_t const* __restrict__ moduleId, // module id of each module
26  int32_t* __restrict__ clusterId, // modified: cluster id of each pixel
27  uint32_t numElements) {
28  __shared__ int32_t charge[maxNumClustersPerModules];
29  __shared__ uint8_t ok[maxNumClustersPerModules];
30  __shared__ uint16_t newclusId[maxNumClustersPerModules];
31 
33  [[maybe_unused]] constexpr int nMaxModules = TrackerTraits::numberOfModules;
34 
37 
40  for (auto module = firstModule; module < endModule; module += gridDim.x) {
41  auto firstPixel = moduleStart[1 + module];
42  auto thisModuleId = id[firstPixel];
43  while (thisModuleId == invalidModuleId and firstPixel < numElements) {
44  // skip invalid or duplicate pixels
45  ++firstPixel;
46  thisModuleId = id[firstPixel];
47  }
48  if (firstPixel >= numElements) {
49  // reached the end of the input while skipping the invalid pixels, nothing left to do
50  break;
51  }
52  if (thisModuleId != moduleId[module]) {
53  // reached the end of the module while skipping the invalid pixels, skip this module
54  continue;
55  }
56  assert(thisModuleId < nMaxModules);
57 
58  auto nclus = nClustersInModule[thisModuleId];
59  if (nclus == 0)
60  continue;
61 
62  if (threadIdx.x == 0 && nclus > maxNumClustersPerModules)
63  printf("Warning too many clusters in module %d in block %d: %d > %d\n",
64  thisModuleId,
65  blockIdx.x,
66  nclus,
68 
69  auto first = firstPixel + threadIdx.x;
70 
71  if (nclus > maxNumClustersPerModules) {
72  // remove excess FIXME find a way to cut charge first....
73  for (auto i = first; i < numElements; i += blockDim.x) {
74  if (id[i] == invalidModuleId)
75  continue; // not valid
76  if (id[i] != thisModuleId)
77  break; // end of module
79  id[i] = invalidModuleId;
81  }
82  }
84  }
85 
86 #ifdef GPU_DEBUG
87  if (thisModuleId % 100 == 1)
88  if (threadIdx.x == 0)
89  printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x);
90 #endif
91 
93  for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
94  charge[i] = 0;
95  }
96  __syncthreads();
97 
98  for (auto i = first; i < numElements; i += blockDim.x) {
99  if (id[i] == invalidModuleId)
100  continue; // not valid
101  if (id[i] != thisModuleId)
102  break; // end of module
104  }
105  __syncthreads();
106 
107  auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
108 
109  bool good = true;
110  for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
111  newclusId[i] = ok[i] = charge[i] >= chargeCut ? 1 : 0;
112  if (0 == ok[i])
113  good = false;
114  }
115 
116  // if all clusters above threshold do nothing
117  if (__syncthreads_and(good))
118  continue;
119 
120  // renumber
121  __shared__ uint16_t ws[32];
122  cms::cuda::blockPrefixScan(newclusId, nclus, ws);
123 
124  assert(nclus > newclusId[nclus - 1]);
125 
126  nClustersInModule[thisModuleId] = newclusId[nclus - 1];
127 
128  // reassign id
129  for (auto i = first; i < numElements; i += blockDim.x) {
130  if (id[i] == invalidModuleId)
131  continue; // not valid
132  if (id[i] != thisModuleId)
133  break; // end of module
134  if (0 == ok[clusterId[i]])
135  clusterId[i] = id[i] = invalidModuleId;
136  else
137  clusterId[i] = newclusId[clusterId[i]] - 1;
138  }
139 
140  //done
141  __syncthreads();
142  } // loop on modules
143  }
144 
145 } // namespace gpuClustering
146 
147 #endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
const dim3 threadIdx
Definition: cudaCompat.h:29
__shared__ uint8_t ok[maxNumClustersPerModules]
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ nClustersInModule
constexpr int nMaxModules
const dim3 gridDim
Definition: cudaCompat.h:33
#define __global__
Definition: cudaCompat.h:19
const dim3 blockDim
Definition: cudaCompat.h:30
float clusterChargeCut(const edm::ParameterSet &conf, const char *name="clusterChargeCut")
chargeCut
Definition: DMR_cfg.py:159
constexpr uint16_t numberOfModules
constexpr int32_t getThresholdForLayerOnCondition(bool isLayer1) const noexcept
auto &__restrict__ ws
assert(nMaxModules< maxNumModules)
constexpr int startBPIX2
constexpr int32_t maxNumClustersPerModules
constexpr uint16_t maxNumModules
const dim3 blockIdx
Definition: cudaCompat.h:32
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ clusterId
constexpr uint16_t invalidModuleId
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ uint32_t numElements
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
__shared__ uint16_t newclusId[maxNumClustersPerModules]
void __syncthreads()
Definition: cudaCompat.h:132
static constexpr uint32_t layerStart[numberOfLayers+1]
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ moduleId
bool __syncthreads_and(bool x)
Definition: cudaCompat.h:135
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
uint16_t *__restrict__ uint16_t const *__restrict__ adc