CMS 3D CMS Logo

gpuClusterChargeCut.h
Go to the documentation of this file.
1 #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
2 #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
3 
4 #include <cstdint>
5 #include <cstdio>
6 
12 
13 namespace gpuClustering {
14 
15  template <typename TrackerTraits>
18  clusterThresholds, // charge cut on cluster in electrons (for layer 1 and for other layers)
19  uint16_t* __restrict__ id, // module id of each pixel (modified if bad cluster)
20  uint16_t const* __restrict__ adc, // charge of each pixel
21  uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module
22  uint32_t* __restrict__ nClustersInModule, // modified: number of clusters found in each module
23  uint32_t const* __restrict__ moduleId, // module id of each module
24  int32_t* __restrict__ clusterId, // modified: cluster id of each pixel
25  uint32_t numElements) {
27 
28  __shared__ int32_t charge[maxNumClustersPerModules];
29  __shared__ uint8_t ok[maxNumClustersPerModules];
30  __shared__ uint16_t newclusId[maxNumClustersPerModules];
31 
34 
37 
40  for (auto module = firstModule; module < endModule; module += gridDim.x) {
41  auto firstPixel = moduleStart[1 + module];
42  auto thisModuleId = id[firstPixel];
43  while (thisModuleId == invalidModuleId and firstPixel < numElements) {
44  // skip invalid or duplicate pixels
45  ++firstPixel;
46  thisModuleId = id[firstPixel];
47  }
48  if (firstPixel >= numElements) {
49  // reached the end of the input while skipping the invalid pixels, nothing left to do
50  break;
51  }
52  if (thisModuleId != moduleId[module]) {
53  // reached the end of the module while skipping the invalid pixels, skip this module
54  continue;
55  }
56  assert(thisModuleId < nMaxModules);
57 
58  auto nclus = nClustersInModule[thisModuleId];
59  if (nclus == 0)
60  continue;
61 
62  if (threadIdx.x == 0 && nclus > maxNumClustersPerModules)
63  printf("Warning too many clusters in module %d in block %d: %d > %d\n",
64  thisModuleId,
65  blockIdx.x,
66  nclus,
68 
69  auto first = firstPixel + threadIdx.x;
70 
71  if (nclus > maxNumClustersPerModules) {
72  // remove excess FIXME find a way to cut charge first....
73  for (auto i = first; i < numElements; i += blockDim.x) {
74  if (id[i] == invalidModuleId)
75  continue; // not valid
76  if (id[i] != thisModuleId)
77  break; // end of module
79  id[i] = invalidModuleId;
81  }
82  }
84  }
85 
86 #ifdef GPU_DEBUG
87  if (thisModuleId % 100 == 1)
88  if (threadIdx.x == 0)
89  printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x);
90 #endif
91 
93  for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
94  charge[i] = 0;
95  }
96  __syncthreads();
97 
98  for (auto i = first; i < numElements; i += blockDim.x) {
99  if (id[i] == invalidModuleId)
100  continue; // not valid
101  if (id[i] != thisModuleId)
102  break; // end of module
104  }
105  __syncthreads();
106 
107  auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
108 
109  bool good = true;
110  for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
111  newclusId[i] = ok[i] = charge[i] >= chargeCut ? 1 : 0;
112  if (0 == ok[i])
113  good = false;
114  }
115 
116  // if all clusters above threshold do nothing
117  if (__syncthreads_and(good))
118  continue;
119 
120  // renumber
121  __shared__ uint16_t ws[32];
122  constexpr auto maxThreads = 1024;
123  auto minClust = nclus > maxThreads ? maxThreads : nclus;
124 
126  if constexpr (maxNumClustersPerModules > maxThreads) //only if needed
127  {
128  for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) {
130 
131  for (uint32_t i = threadIdx.x + offset; i < nclus; i += blockDim.x) {
132  uint32_t prevBlockEnd = ((i / maxThreads) * maxThreads) - 1;
133  newclusId[i] += newclusId[prevBlockEnd];
134  }
135 
136  __syncthreads();
137  }
138  }
139  assert(nclus > newclusId[nclus - 1]);
140 
141  nClustersInModule[thisModuleId] = newclusId[nclus - 1];
142 
143  // reassign id
144  for (auto i = first; i < numElements; i += blockDim.x) {
145  if (id[i] == invalidModuleId)
146  continue; // not valid
147  if (id[i] != thisModuleId)
148  break; // end of module
149  if (0 == ok[clusterId[i]])
150  clusterId[i] = id[i] = invalidModuleId;
151  else
152  clusterId[i] = newclusId[clusterId[i]] - 1;
153  }
154 
155  //done
156  __syncthreads();
157  } // loop on modules
158  }
159 
160 } // namespace gpuClustering
161 
162 #endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
const dim3 threadIdx
Definition: cudaCompat.h:29
__shared__ uint8_t ok[maxNumClustersPerModules]
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ nClustersInModule
constexpr int nMaxModules
const dim3 gridDim
Definition: cudaCompat.h:33
#define __global__
Definition: cudaCompat.h:19
__shared__ int32_t charge[maxNumClustersPerModules]
const dim3 blockDim
Definition: cudaCompat.h:30
chargeCut
Definition: DMR_cfg.py:159
constexpr uint16_t numberOfModules
constexpr int32_t getThresholdForLayerOnCondition(bool isLayer1) const noexcept
assert(nMaxModules< maxNumModules)
constexpr int startBPIX2
constexpr uint16_t maxNumModules
const dim3 blockIdx
Definition: cudaCompat.h:32
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ clusterId
constexpr uint16_t invalidModuleId
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ uint32_t numElements
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
__shared__ uint16_t newclusId[maxNumClustersPerModules]
float clusterChargeCut(const edm::ParameterSet &conf, const char *name="clusterChargeCut")
void __syncthreads()
Definition: cudaCompat.h:132
constexpr int32_t maxNumClustersPerModules
ALPAKA_FN_ACC ALPAKA_FN_INLINE void blockPrefixScan(const TAcc &acc, T const *ci, T *co, int32_t size, T *ws=nullptr)
Definition: prefixScan.h:47
static constexpr uint32_t layerStart[numberOfLayers+1]
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ moduleId
bool __syncthreads_and(bool x)
Definition: cudaCompat.h:135
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
uint16_t *__restrict__ uint16_t const *__restrict__ adc