CMS 3D CMS Logo

gpuClusterChargeCut.h
Go to the documentation of this file.
1 #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
2 #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
3 
4 #include <cstdint>
5 #include <cstdio>
6 
12 
13 namespace gpuClustering {
14 
15  template <typename TrackerTraits>
18  clusterThresholds, // charge cut on cluster in electrons (for layer 1 and for other layers)
19  uint16_t* __restrict__ id, // module id of each pixel (modified if bad cluster)
20  uint16_t const* __restrict__ adc, // charge of each pixel
21  uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module
22  uint32_t* __restrict__ nClustersInModule, // modified: number of clusters found in each module
23  uint32_t const* __restrict__ moduleId, // module id of each module
24  int32_t* __restrict__ clusterId, // modified: cluster id of each pixel
25  uint32_t numElements) {
27 
28  __shared__ int32_t charge[maxNumClustersPerModules];
29  __shared__ uint8_t ok[maxNumClustersPerModules];
30  __shared__ uint16_t newclusId[maxNumClustersPerModules];
31 
33 
36 
39  for (auto module = firstModule; module < endModule; module += gridDim.x) {
40  auto firstPixel = moduleStart[1 + module];
41  auto thisModuleId = id[firstPixel];
42  while (thisModuleId == invalidModuleId and firstPixel < numElements) {
43  // skip invalid or duplicate pixels
44  ++firstPixel;
45  thisModuleId = id[firstPixel];
46  }
47  if (firstPixel >= numElements) {
48  // reached the end of the input while skipping the invalid pixels, nothing left to do
49  break;
50  }
51  if (thisModuleId != moduleId[module]) {
52  // reached the end of the module while skipping the invalid pixels, skip this module
53  continue;
54  }
55  assert(thisModuleId < TrackerTraits::numberOfModules);
56 
57  auto nclus = nClustersInModule[thisModuleId];
58  if (nclus == 0)
59  continue;
60 
61  if (threadIdx.x == 0 && nclus > maxNumClustersPerModules)
62  printf("Warning too many clusters in module %d in block %d: %d > %d\n",
63  thisModuleId,
64  blockIdx.x,
65  nclus,
67 
68  auto first = firstPixel + threadIdx.x;
69 
70  if (nclus > maxNumClustersPerModules) {
71  // remove excess FIXME find a way to cut charge first....
72  for (auto i = first; i < numElements; i += blockDim.x) {
73  if (id[i] == invalidModuleId)
74  continue; // not valid
75  if (id[i] != thisModuleId)
76  break; // end of module
78  id[i] = invalidModuleId;
80  }
81  }
83  }
84 
85 #ifdef GPU_DEBUG
86  if (thisModuleId % 100 == 1)
87  if (threadIdx.x == 0)
88  printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x);
89 #endif
90 
92  for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
93  charge[i] = 0;
94  }
95  __syncthreads();
96 
97  for (auto i = first; i < numElements; i += blockDim.x) {
98  if (id[i] == invalidModuleId)
99  continue; // not valid
100  if (id[i] != thisModuleId)
101  break; // end of module
103  }
104  __syncthreads();
105 
106  auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
107 
108  bool good = true;
109  for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
110  newclusId[i] = ok[i] = charge[i] >= chargeCut ? 1 : 0;
111  if (0 == ok[i])
112  good = false;
113  }
114 
115  // if all clusters above threshold do nothing
116  if (__syncthreads_and(good))
117  continue;
118 
119  // renumber
120  __shared__ uint16_t ws[32];
121  constexpr auto maxThreads = 1024;
122  auto minClust = nclus > maxThreads ? maxThreads : nclus;
123 
125  if constexpr (maxNumClustersPerModules > maxThreads) //only if needed
126  {
127  for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) {
129  for (uint32_t i = threadIdx.x + offset; i < nclus; i += blockDim.x) {
130  uint32_t prevBlockEnd = ((i / maxThreads) * maxThreads) - 1;
131  newclusId[i] += newclusId[prevBlockEnd];
132  }
133  __syncthreads();
134  }
135  }
136  assert(nclus > newclusId[nclus - 1]);
137 
138  nClustersInModule[thisModuleId] = newclusId[nclus - 1];
139 
140  // reassign id
141  for (auto i = first; i < numElements; i += blockDim.x) {
142  if (id[i] == invalidModuleId)
143  continue; // not valid
144  if (id[i] != thisModuleId)
145  break; // end of module
146  if (0 == ok[clusterId[i]])
147  clusterId[i] = id[i] = invalidModuleId;
148  else
149  clusterId[i] = newclusId[clusterId[i]] - 1;
150  }
151 
152  // done
153  __syncthreads();
154  } // loop on modules
155  }
156 
157 } // namespace gpuClustering
158 
159 #endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
const dim3 threadIdx
Definition: cudaCompat.h:29
__shared__ uint8_t ok[maxNumClustersPerModules]
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ nClustersInModule
const dim3 gridDim
Definition: cudaCompat.h:33
#define __global__
Definition: cudaCompat.h:19
__shared__ int32_t charge[maxNumClustersPerModules]
const dim3 blockDim
Definition: cudaCompat.h:30
chargeCut
Definition: DMR_cfg.py:159
constexpr uint16_t numberOfModules
constexpr int32_t getThresholdForLayerOnCondition(bool isLayer1) const noexcept
assert(TrackerTraits::numberOfModules< maxNumModules)
constexpr int startBPIX2
constexpr uint16_t maxNumModules
const dim3 blockIdx
Definition: cudaCompat.h:32
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ clusterId
constexpr uint16_t invalidModuleId
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ uint32_t numElements
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
__shared__ uint16_t newclusId[maxNumClustersPerModules]
float clusterChargeCut(const edm::ParameterSet &conf, const char *name="clusterChargeCut")
void __syncthreads()
Definition: cudaCompat.h:132
constexpr int32_t maxNumClustersPerModules
ALPAKA_FN_ACC ALPAKA_FN_INLINE void blockPrefixScan(const TAcc &acc, T const *ci, T *co, int32_t size, T *ws=nullptr)
Definition: prefixScan.h:47
static constexpr uint32_t layerStart[numberOfLayers+1]
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ moduleId
bool __syncthreads_and(bool x)
Definition: cudaCompat.h:135
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
uint16_t *__restrict__ uint16_t const *__restrict__ adc