CMS 3D CMS Logo

List of all members | Public Member Functions
pixelClustering::ClusterChargeCut< TrackerTraits > Struct Template Reference

#include <ClusterChargeCut.h>

Public Member Functions

template<typename TAcc >
ALPAKA_FN_ACC void operator() (const TAcc &acc, SiPixelDigisSoAView digi_view, SiPixelClustersSoAView clus_view, SiPixelClusterThresholds clusterThresholds, const uint32_t numElements) const
 

Detailed Description

template<typename TrackerTraits>
struct pixelClustering::ClusterChargeCut< TrackerTraits >

Definition at line 18 of file ClusterChargeCut.h.

Member Function Documentation

◆ operator()()

template<typename TrackerTraits >
template<typename TAcc >
ALPAKA_FN_ACC void pixelClustering::ClusterChargeCut< TrackerTraits >::operator() ( const TAcc &  acc,
SiPixelDigisSoAView  digi_view,
SiPixelClustersSoAView  clus_view,
SiPixelClusterThresholds  clusterThresholds,
const uint32_t  numElements 
) const
inline

Definition at line 20 of file ClusterChargeCut.h.

References gpuClustering::adc, ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::ALPAKA_ASSERT_OFFLOAD(), cms::cudacompat::atomicAdd(), cms::cudacompat::blockIdx, cms::alpakatools::blockPrefixScan(), ALCARECOTkAlJpsiMuMu_cff::charge, DMR_cfg::chargeCut, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), cms::alpakatools::element_index_range_in_block(), gpuClustering::endModule, gpuClustering::firstModule, cms::alpakatools::for_each_element_in_block_strided(), SiPixelClusterThresholds::getThresholdForLayerOnCondition(), ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::gridDimension(), mps_fire::i, pixelClustering::invalidModuleId, phase1PixelTopology::layerStart, pixelClustering::maxNumClustersPerModules, pixelClustering::maxNumModules, SiStripPI::min, callgraph::module, gpuClustering::moduleId, gpuClustering::newclusId, cms::alpakatools::next_valid_element_index_strided(), gpuClustering::nMaxModules, phase1PixelTopology::numberOfModules, gpuClustering::numElements, hltrates_dqm_sourceclient-live_cfg::offset, convertSQLiteXML::ok, cms::alpakatools::once_per_block(), gpuClustering::startBPIX2, and ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ws.

26  {
30 
31  const uint32_t blockIdx(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
32  auto firstModule = blockIdx;
33  auto endModule = clus_view[0].moduleStart();
34  if (blockIdx >= endModule)
35  return;
36 
37  auto& charge = alpaka::declareSharedVar<int32_t[maxNumClustersPerModules], __COUNTER__>(acc);
38  auto& ok = alpaka::declareSharedVar<uint8_t[maxNumClustersPerModules], __COUNTER__>(acc);
39  auto& newclusId = alpaka::declareSharedVar<uint16_t[maxNumClustersPerModules], __COUNTER__>(acc);
40 
41  const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
42 
44  auto firstPixel = clus_view[1 + module].moduleStart();
45  auto thisModuleId = digi_view[firstPixel].moduleId();
46 
49 
50  uint32_t nclus = clus_view[thisModuleId].clusInModule();
51  if (nclus == 0)
52  return;
53 
55  printf("Warning too many clusters in module %d in block %d: %d > %d\n",
56  thisModuleId,
57  module,
58  nclus,
60 
61  // Stride = block size.
62  const uint32_t blockDimension(alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]);
63 
64  // Get thread / CPU element indices in block.
65  const auto& [firstElementIdxNoStride, endElementIdxNoStride] =
67 
68  if (nclus > maxNumClustersPerModules) {
69  uint32_t firstElementIdx = firstElementIdxNoStride;
70  uint32_t endElementIdx = endElementIdxNoStride;
71  // remove excess FIXME find a way to cut charge first....
72  for (uint32_t i = firstElementIdx; i < numElements; ++i) {
74  i, firstElementIdx, endElementIdx, blockDimension, numElements))
75  break;
76  if (digi_view[i].moduleId() == invalidModuleId)
77  continue; // not valid
78  if (digi_view[i].moduleId() != thisModuleId)
79  break; // end of module
80  if (digi_view[i].clus() >= maxNumClustersPerModules) {
81  digi_view[i].moduleId() = invalidModuleId;
82  digi_view[i].clus() = invalidModuleId;
83  }
84  }
86  }
87 
88 #ifdef GPU_DEBUG
89  if (thisModuleId % 100 == 1)
91  printf("start cluster charge cut for module %d in block %d\n", thisModuleId, module);
92 #endif
93 
95  cms::alpakatools::for_each_element_in_block_strided(acc, nclus, [&](uint32_t i) { charge[i] = 0; });
96  alpaka::syncBlockThreads(acc);
97 
98  uint32_t firstElementIdx = firstElementIdxNoStride;
99  uint32_t endElementIdx = endElementIdxNoStride;
100  for (uint32_t i = firstElementIdx; i < numElements; ++i) {
102  i, firstElementIdx, endElementIdx, blockDimension, numElements))
103  break;
104  if (digi_view[i].moduleId() == invalidModuleId)
105  continue; // not valid
106  if (digi_view[i].moduleId() != thisModuleId)
107  break; // end of module
108  alpaka::atomicAdd(acc,
109  &charge[digi_view[i].clus()],
110  static_cast<int32_t>(digi_view[i].adc()),
111  alpaka::hierarchy::Threads{});
112  }
113  alpaka::syncBlockThreads(acc);
114 
115  auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
116  bool allGood = true;
117 
118  cms::alpakatools::for_each_element_in_block_strided(acc, nclus, [&](uint32_t i) {
119  newclusId[i] = ok[i] = (charge[i] > chargeCut) ? 1 : 0;
120  if (ok[i] == 0)
121  allGood = allGood && false;
122 
123  // #ifdef GPU_DEBUG
124  // printf("module %d -> chargeCut = %d; cluster %d; charge = %d; ok = %s\n",thisModuleId, chargeCut,i,charge[i],ok[i] > 0 ? " -> good" : "-> cut");
125  // #endif
126  });
127  alpaka::syncBlockThreads(acc);
128 
129  // if all clusters above threshold do nothing
130  // if (allGood)
131  // continue;
132 
133  // renumber
134  auto& ws = alpaka::declareSharedVar<uint16_t[32], __COUNTER__>(acc);
135  constexpr uint32_t maxThreads = 1024;
136  auto minClust = std::min(nclus, maxThreads);
137 
139 
140  if constexpr (maxNumClustersPerModules > maxThreads) //only if needed
141  {
142  for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) {
144 
145  cms::alpakatools::for_each_element_in_block_strided(acc, nclus - offset, [&](uint32_t i) {
146  uint32_t prevBlockEnd = ((i + offset / maxThreads) * maxThreads) - 1;
147  newclusId[i] += newclusId[prevBlockEnd];
148  });
149  alpaka::syncBlockThreads(acc);
150  }
151  }
152 
153  ALPAKA_ASSERT_OFFLOAD(nclus >= newclusId[nclus - 1]);
154 
155  if (nclus == newclusId[nclus - 1])
156  return;
157 
158  clus_view[thisModuleId].clusInModule() = newclusId[nclus - 1];
159  alpaka::syncBlockThreads(acc);
160 
161 #ifdef GPU_DEBUG
162  if (thisModuleId % 100 == 1)
164  printf("module %d -> chargeCut = %d; nclus (pre cut) = %d; nclus (after cut) = %d\n",
165  thisModuleId,
166  chargeCut,
167  nclus,
168  clus_view[thisModuleId].clusInModule());
169 #endif
170  // mark bad cluster again
171  cms::alpakatools::for_each_element_in_block_strided(acc, nclus, [&](uint32_t i) {
172  if (0 == ok[i])
173  newclusId[i] = invalidModuleId + 1;
174  });
175 
176  alpaka::syncBlockThreads(acc);
177 
178  // reassign id
179  firstElementIdx = firstElementIdxNoStride;
180  endElementIdx = endElementIdxNoStride;
181  for (uint32_t i = firstElementIdx; i < numElements; ++i) {
183  i, firstElementIdx, endElementIdx, blockDimension, numElements))
184  break;
185  if (digi_view[i].moduleId() == invalidModuleId)
186  continue; // not valid
187  if (digi_view[i].moduleId() != thisModuleId)
188  break; // end of module
189  if (0 == ok[digi_view[i].clus()])
190  digi_view[i].moduleId() = digi_view[i].clus() = invalidModuleId;
191  else
192  digi_view[i].clus() = newclusId[digi_view[i].clus()] - 1;
193  // digi_view[i].clus() = newclusId[digi_view[i].clus()] - 1;
194  // if (digi_view[i].clus() == invalidModuleId)
195  // digi_view[i].moduleId() = invalidModuleId;
196  }
197 
198  alpaka::syncBlockThreads(acc);
199 
200  //done
201  }
202  }
const uint32_t gridDimension(alpaka::getWorkDiv< alpaka::Grid, alpaka::Blocks >(acc)[0u])
ALPAKA_FN_ACC ALPAKA_FN_INLINE bool next_valid_element_index_strided(Idx &i, Idx &firstElementIdx, Idx &endElementIdx, const Idx stride, const Idx maxNumberOfElements)
Definition: workdivision.h:921
constexpr int nMaxModules
ALPAKA_FN_ACC constexpr bool once_per_block(TAcc const &acc)
Definition: workdivision.h:805
chargeCut
Definition: DMR_cfg.py:159
constexpr uint16_t numberOfModules
constexpr int32_t getThresholdForLayerOnCondition(bool isLayer1) const noexcept
ALPAKA_FN_ACC void for_each_element_in_block_strided(const TAcc &acc, const Idx maxNumberOfElements, const Idx elementIdxShift, const Func func, const unsigned int dimIndex=0)
Definition: workdivision.h:936
constexpr int startBPIX2
const dim3 blockIdx
Definition: cudaCompat.h:32
ALPAKA_FN_ACC std::pair< Idx, Idx > element_index_range_in_block(const TAcc &acc, const Idx elementIdxShift, const unsigned int dimIndex=0u)
Definition: workdivision.h:818
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ uint32_t numElements
__shared__ uint16_t newclusId[maxNumClustersPerModules]
constexpr int32_t maxNumClustersPerModules
ALPAKA_FN_ACC ALPAKA_FN_INLINE void blockPrefixScan(const TAcc &acc, T const *ci, T *co, int32_t size, T *ws=nullptr)
Definition: prefixScan.h:47
static constexpr uint32_t layerStart[numberOfLayers+1]
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ moduleId
constexpr uint16_t maxNumModules
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
constexpr uint16_t invalidModuleId
uint16_t *__restrict__ uint16_t const *__restrict__ adc