1 #ifndef RecoLocalTracker_SiPixelClusterizer_alpaka_PixelClustering_h 2 #define RecoLocalTracker_SiPixelClusterizer_alpaka_PixelClustering_h 9 #include <alpaka/alpaka.hpp> 22 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
23 ALPAKA_STATIC_ACC_MEM_GLOBAL uint32_t gMaxHit = 0;
26 namespace pixelStatus {
68 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
70 uint32_t* __restrict__
status,
76 uint32_t expected = old_word;
85 uint32_t new_word = old_word | (
static_cast<uint32_t
>(new_status) <<
shift);
87 }
while (expected != old_word);
92 template <
typename TrackerTraits>
94 template <
typename TAcc>
104 printf(
"Starting to count modules to set module starts:");
108 digi_view[
i].clus() =
i;
123 printf(
"> New module (no. %d) found at digi %d \n", loc,
i);
125 clus_view[loc + 1].moduleStart() =
i;
131 template <
typename TrackerTraits>
140 template <
typename TAcc>
147 auto& lastPixel = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
149 const uint32_t lastModule = clus_view[0].moduleStart();
151 auto firstPixel = clus_view[1 +
module].moduleStart();
152 uint32_t thisModuleId = digi_view[firstPixel].moduleId();
156 if (thisModuleId % 100 == 1)
158 printf(
"start clusterizer for module %4d in block %4d\n",
160 alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
165 alpaka::syncBlockThreads(acc);
169 auto id = digi_view[
i].moduleId();
174 if (
id != thisModuleId) {
181 TrackerTraits::clusterBinning,
182 TrackerTraits::maxPixInModule,
183 TrackerTraits::clusterBits,
185 auto&
hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
186 auto&
ws = alpaka::declareSharedVar<typename Hist::Counter[32], __COUNTER__>(acc);
190 alpaka::syncBlockThreads(acc);
196 if (lastPixel - firstPixel > TrackerTraits::maxPixInModule) {
197 printf(
"too many pixels in module %u: %u > %u\n",
199 lastPixel - firstPixel,
200 TrackerTraits::maxPixInModule);
201 lastPixel = TrackerTraits::maxPixInModule + firstPixel;
204 alpaka::syncBlockThreads(acc);
208 auto& totGood = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
210 alpaka::syncBlockThreads(acc);
217 auto&
status = alpaka::declareSharedVar<uint32_t[pixelStatus::size], __COUNTER__>(acc);
223 alpaka::syncBlockThreads(acc);
231 alpaka::syncBlockThreads(acc);
239 digi_view[
i].rawIdArr() = 0;
242 alpaka::syncBlockThreads(acc);
250 hist.count(acc, digi_view[
i].
yy());
256 alpaka::syncBlockThreads(acc);
260 alpaka::syncBlockThreads(acc);
262 alpaka::syncBlockThreads(acc);
265 if (thisModuleId % 100 == 1)
267 printf(
"histo size %d\n",
hist.size());
272 hist.fill(acc, digi_view[
i].
yy(),
i - firstPixel);
278 auto& n40 = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
279 auto& n60 = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
284 alpaka::syncBlockThreads(acc);
286 if (
hist.size(
j) > 60)
288 if (
hist.size(
j) > 40)
291 alpaka::syncBlockThreads(acc);
294 printf(
"columns with more than 60 px %d in %d\n", n60, thisModuleId);
296 printf(
"columns with more than 40 px %d in %d\n", n40, thisModuleId);
298 alpaka::syncBlockThreads(acc);
301 [[maybe_unused]]
const uint32_t blockDimension = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
308 ALPAKA_ASSERT_ACC((alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u] <= maxElements));
315 uint16_t
nn[maxIter][maxNeighbours];
316 uint8_t nnn[maxIter];
317 for (uint32_t
k = 0;
k < maxIter; ++
k) {
321 alpaka::syncBlockThreads(acc);
327 auto p =
hist.begin() +
j;
328 auto i = *
p + firstPixel;
335 for (;
p < end; ++
p) {
336 auto m = *
p + firstPixel;
340 if (
std::abs(
int(digi_view[
m].
xx()) -
int(digi_view[
i].
xx())) <= 1) {
357 while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc,
more)) {
366 auto p =
hist.begin() +
j;
367 auto i = *
p + firstPixel;
368 for (
int kk = 0;
kk < nnn[
k]; ++
kk) {
370 auto m =
l + firstPixel;
374 if (old != digi_view[
i].clus()) {
388 alpaka::syncBlockThreads(acc);
390 auto p =
hist.begin() +
j;
391 auto i = *
p + firstPixel;
392 auto m = digi_view[
i].clus();
393 while (
m != digi_view[
m].clus())
394 m = digi_view[
m].clus();
395 digi_view[
i].clus() =
m;
415 auto&
foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
417 alpaka::syncBlockThreads(acc);
425 if (digi_view[
i].clus() == static_cast<int>(
i)) {
427 digi_view[
i].clus() = -(old + 1);
430 alpaka::syncBlockThreads(acc);
437 if (digi_view[
i].clus() >= 0) {
439 digi_view[
i].clus() = digi_view[digi_view[
i].clus()].clus();
442 alpaka::syncBlockThreads(acc);
450 digi_view[
i].clus() = -digi_view[
i].clus() - 1;
453 alpaka::syncBlockThreads(acc);
457 clus_view[
module].moduleId() = thisModuleId;
464 if (thisModuleId % 100 == 1)
465 printf(
"%d clusters in module %d\n",
foundClusters, thisModuleId);
474 #endif // plugin_SiPixelClusterizer_alpaka_PixelClustering.h cms::alpakatools::HistoContainer< uint8_t, 256, 16000, 8, uint16_t > Hist
constexpr uint32_t pixelSizeY
constexpr uint32_t valuesPerWord
static constexpr uint16_t numColsInModule
constexpr uint32_t pixelSizeX
static constexpr uint16_t numRowsInModule
constexpr uint16_t numberOfModules
ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelDigisSoAView digi_view, SiPixelClustersSoAView clus_view, const unsigned int numElements) const
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr uint32_t getIndex(uint16_t x, uint16_t y)
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr void promote(TAcc const &acc, uint32_t *__restrict__ status, const uint16_t x, const uint16_t y)
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr uint32_t getShift(uint16_t x, uint16_t y)
T1 atomicInc(T1 *a, T2 b)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::vector< Block > Blocks
Abs< T >::type abs(const T &t)
static constexpr uint32_t maxElementsPerBlock
constexpr uint16_t invalidModuleId
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ uint32_t numElements
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelDigisSoAView digi_view, SiPixelClustersSoAView clus_view, const unsigned int numElements) const
static constexpr uint32_t maxIterGPU
static unsigned int const shift
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ moduleId
constexpr int invalidClusterId
constexpr uint16_t maxNumModules
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr bool isDuplicate(uint32_t const *__restrict__ status, uint16_t x, uint16_t y)
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr Status getStatus(uint32_t const *__restrict__ status, uint16_t x, uint16_t y)
T1 atomicMin(T1 *a, T2 b)
ALPAKA_ASSERT_ACC(offsets)
T1 atomicAdd(T1 *a, T2 b)
constexpr uint16_t invalidModuleId