CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
gpuClusterChargeCut.h
Go to the documentation of this file.
1 #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
2 #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
3 
4 #include <cstdint>
5 #include <cstdio>
6 
11 
12 // local include(s)
14 
15 namespace gpuClustering {
16 
17  template <bool isPhase2>
20  clusterThresholds, // charge cut on cluster in electrons (for layer 1 and for other layers)
21  uint16_t* __restrict__ id, // module id of each pixel (modified if bad cluster)
22  uint16_t const* __restrict__ adc, // charge of each pixel
23  uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module
24  uint32_t* __restrict__ nClustersInModule, // modified: number of clusters found in each module
25  uint32_t const* __restrict__ moduleId, // module id of each module
26  int32_t* __restrict__ clusterId, // modified: cluster id of each pixel
27  uint32_t numElements) {
28  __shared__ int32_t charge[maxNumClustersPerModules];
29  __shared__ uint8_t ok[maxNumClustersPerModules];
30  __shared__ uint16_t newclusId[maxNumClustersPerModules];
31 
34 
35  assert(nMaxModules < maxNumModules);
36  assert(startBPIX2 < nMaxModules);
37 
39  auto endModule = moduleStart[0];
40  for (auto module = firstModule; module < endModule; module += gridDim.x) {
41  auto firstPixel = moduleStart[1 + module];
42  auto thisModuleId = id[firstPixel];
43  assert(thisModuleId < nMaxModules);
44  assert(thisModuleId == moduleId[module]);
45 
46  auto nclus = nClustersInModule[thisModuleId];
47  if (nclus == 0)
48  continue;
49 
50  if (threadIdx.x == 0 && nclus > maxNumClustersPerModules)
51  printf("Warning too many clusters in module %d in block %d: %d > %d\n",
52  thisModuleId,
53  blockIdx.x,
54  nclus,
56 
57  auto first = firstPixel + threadIdx.x;
58 
59  if (nclus > maxNumClustersPerModules) {
60  // remove excess FIXME find a way to cut charge first....
61  for (auto i = first; i < numElements; i += blockDim.x) {
62  if (id[i] == invalidModuleId)
63  continue; // not valid
64  if (id[i] != thisModuleId)
65  break; // end of module
66  if (clusterId[i] >= maxNumClustersPerModules) {
67  id[i] = invalidModuleId;
68  clusterId[i] = invalidModuleId;
69  }
70  }
72  }
73 
74 #ifdef GPU_DEBUG
75  if (thisModuleId % 100 == 1)
76  if (threadIdx.x == 0)
77  printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x);
78 #endif
79 
81  for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
82  charge[i] = 0;
83  }
84  __syncthreads();
85 
86  for (auto i = first; i < numElements; i += blockDim.x) {
87  if (id[i] == invalidModuleId)
88  continue; // not valid
89  if (id[i] != thisModuleId)
90  break; // end of module
91  atomicAdd(&charge[clusterId[i]], adc[i]);
92  }
93  __syncthreads();
94 
95  auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
96 
97  bool good = true;
98  for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
99  newclusId[i] = ok[i] = charge[i] >= chargeCut ? 1 : 0;
100  if (0 == ok[i])
101  good = false;
102  }
103 
104  // if all clusters above threshold do nothing
105  if (__syncthreads_and(good))
106  continue;
107 
108  // renumber
109  __shared__ uint16_t ws[32];
110  cms::cuda::blockPrefixScan(newclusId, nclus, ws);
111 
112  assert(nclus > newclusId[nclus - 1]);
113 
114  nClustersInModule[thisModuleId] = newclusId[nclus - 1];
115 
116  // reassign id
117  for (auto i = first; i < numElements; i += blockDim.x) {
118  if (id[i] == invalidModuleId)
119  continue; // not valid
120  if (id[i] != thisModuleId)
121  break; // end of module
122  if (0 == ok[clusterId[i]])
123  clusterId[i] = id[i] = invalidModuleId;
124  else
125  clusterId[i] = newclusId[clusterId[i]] - 1;
126  }
127 
128  //done
129  __syncthreads();
130  } // loop on modules
131  }
132 
133 } // namespace gpuClustering
134 
135 #endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
const dim3 threadIdx
Definition: cudaCompat.h:29
__shared__ uint8_t ok[maxNumClustersPerModules]
constexpr int nMaxModules
const dim3 gridDim
Definition: cudaCompat.h:33
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ clusterId
constexpr uint32_t layerStart[numberOfLayers+1]
#define __global__
Definition: cudaCompat.h:19
const dim3 blockDim
Definition: cudaCompat.h:30
float clusterChargeCut(const edm::ParameterSet &conf, const char *name="clusterChargeCut")
constexpr uint32_t numberOfModules
constexpr int32_t getThresholdForLayerOnCondition(bool isLayer1) const noexcept
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ nClustersInModule
auto &__restrict__ ws
assert(nMaxModules< maxNumModules)
constexpr int startBPIX2
printf("params %d %f %f %f\n", minT, eps, errmax, chi2max)
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
constexpr int32_t maxNumClustersPerModules
constexpr uint16_t maxNumModules
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ uint32_t numElements
const dim3 blockIdx
Definition: cudaCompat.h:32
constexpr uint16_t invalidModuleId
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ moduleId
constexpr uint32_t numberOfModules
auto const good
min quality of good
__shared__ uint16_t newclusId[maxNumClustersPerModules]
void __syncthreads()
Definition: cudaCompat.h:108
constexpr uint32_t layerStart[numberOfLayers+1]
bool __syncthreads_and(bool x)
Definition: cudaCompat.h:111
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
tuple module
Definition: callgraph.py:69
uint16_t *__restrict__ uint16_t const *__restrict__ adc