1 #ifndef RecoLocalTracker_SiPixelClusterizer_alpaka_PixelClustering_h 2 #define RecoLocalTracker_SiPixelClusterizer_alpaka_PixelClustering_h 9 #include <alpaka/alpaka.hpp> 22 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
23 ALPAKA_STATIC_ACC_MEM_GLOBAL uint32_t gMaxHit = 0;
26 namespace pixelStatus {
68 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
70 uint32_t* __restrict__
status,
76 uint32_t expected = old_word;
85 uint32_t new_word = old_word | (
static_cast<uint32_t
>(new_status) <<
shift);
87 }
while (expected != old_word);
92 template <
typename TrackerTraits>
94 template <
typename TAcc>
104 printf(
"Starting to count modules to set module starts:");
108 digi_view[
i].clus() =
i;
123 printf(
"> New module (no. %d) found at digi %d \n", loc,
i);
125 clus_view[loc + 1].moduleStart() =
i;
131 template <
typename TrackerTraits>
140 template <
typename TAcc>
147 auto& lastPixel = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
149 const uint32_t lastModule = clus_view[0].moduleStart();
151 auto firstPixel = clus_view[1 +
module].moduleStart();
152 uint32_t thisModuleId = digi_view[firstPixel].moduleId();
156 if (thisModuleId % 100 == 1)
158 printf(
"start clusterizer for module %4d in block %4d\n",
160 alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
165 alpaka::syncBlockThreads(acc);
169 auto id = digi_view[
i].moduleId();
174 if (
id != thisModuleId) {
181 TrackerTraits::clusterBinning,
182 TrackerTraits::maxPixInModule,
183 TrackerTraits::clusterBits,
185 #if defined(__HIP_DEVICE_COMPILE__) 186 constexpr auto warpSize = __AMDGCN_WAVEFRONT_SIZE;
190 auto&
hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
191 auto&
ws = alpaka::declareSharedVar<typename Hist::Counter[warpSize], __COUNTER__>(acc);
195 alpaka::syncBlockThreads(acc);
201 if (lastPixel - firstPixel > TrackerTraits::maxPixInModule) {
202 printf(
"too many pixels in module %u: %u > %u\n",
204 lastPixel - firstPixel,
205 TrackerTraits::maxPixInModule);
206 lastPixel = TrackerTraits::maxPixInModule + firstPixel;
209 alpaka::syncBlockThreads(acc);
213 auto& totGood = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
215 alpaka::syncBlockThreads(acc);
222 auto&
status = alpaka::declareSharedVar<uint32_t[pixelStatus::size], __COUNTER__>(acc);
228 alpaka::syncBlockThreads(acc);
236 alpaka::syncBlockThreads(acc);
244 digi_view[
i].rawIdArr() = 0;
247 alpaka::syncBlockThreads(acc);
255 hist.count(acc, digi_view[
i].
yy());
261 alpaka::syncBlockThreads(acc);
265 alpaka::syncBlockThreads(acc);
267 alpaka::syncBlockThreads(acc);
270 if (thisModuleId % 100 == 1)
272 printf(
"histo size %d\n",
hist.size());
277 hist.fill(acc, digi_view[
i].
yy(),
i - firstPixel);
283 auto& n40 = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
284 auto& n60 = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
289 alpaka::syncBlockThreads(acc);
291 if (
hist.size(
j) > 60)
293 if (
hist.size(
j) > 40)
296 alpaka::syncBlockThreads(acc);
299 printf(
"columns with more than 60 px %d in %d\n", n60, thisModuleId);
301 printf(
"columns with more than 40 px %d in %d\n", n40, thisModuleId);
303 alpaka::syncBlockThreads(acc);
306 [[maybe_unused]]
const uint32_t blockDimension = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
313 ALPAKA_ASSERT_ACC((alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u] <= maxElements));
320 uint16_t
nn[maxIter][maxNeighbours];
321 uint8_t nnn[maxIter];
322 for (uint32_t
k = 0;
k < maxIter; ++
k) {
326 alpaka::syncBlockThreads(acc);
332 auto p =
hist.begin() +
j;
333 auto i = *
p + firstPixel;
340 for (;
p < end; ++
p) {
341 auto m = *
p + firstPixel;
345 if (
std::abs(
int(digi_view[
m].
xx()) -
int(digi_view[
i].
xx())) <= 1) {
362 while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc,
more)) {
371 auto p =
hist.begin() +
j;
372 auto i = *
p + firstPixel;
373 for (
int kk = 0;
kk < nnn[
k]; ++
kk) {
375 auto m =
l + firstPixel;
379 if (old != digi_view[
i].clus()) {
393 alpaka::syncBlockThreads(acc);
395 auto p =
hist.begin() +
j;
396 auto i = *
p + firstPixel;
397 auto m = digi_view[
i].clus();
398 while (
m != digi_view[
m].clus())
399 m = digi_view[
m].clus();
400 digi_view[
i].clus() =
m;
420 auto&
foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
422 alpaka::syncBlockThreads(acc);
430 if (digi_view[
i].clus() == static_cast<int>(
i)) {
432 digi_view[
i].clus() = -(old + 1);
435 alpaka::syncBlockThreads(acc);
442 if (digi_view[
i].clus() >= 0) {
444 digi_view[
i].clus() = digi_view[digi_view[
i].clus()].clus();
447 alpaka::syncBlockThreads(acc);
455 digi_view[
i].clus() = -digi_view[
i].clus() - 1;
458 alpaka::syncBlockThreads(acc);
462 clus_view[
module].moduleId() = thisModuleId;
469 if (thisModuleId % 100 == 1)
470 printf(
"%d clusters in module %d\n",
foundClusters, thisModuleId);
479 #endif // plugin_SiPixelClusterizer_alpaka_PixelClustering.h cms::alpakatools::HistoContainer< uint8_t, 256, 16000, 8, uint16_t > Hist
constexpr uint32_t pixelSizeY
constexpr uint32_t valuesPerWord
static constexpr uint16_t numColsInModule
constexpr uint32_t pixelSizeX
static constexpr uint16_t numRowsInModule
constexpr uint16_t numberOfModules
ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelDigisSoAView digi_view, SiPixelClustersSoAView clus_view, const unsigned int numElements) const
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr uint32_t getIndex(uint16_t x, uint16_t y)
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr void promote(TAcc const &acc, uint32_t *__restrict__ status, const uint16_t x, const uint16_t y)
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr uint32_t getShift(uint16_t x, uint16_t y)
T1 atomicInc(T1 *a, T2 b)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::vector< Block > Blocks
Abs< T >::type abs(const T &t)
static constexpr uint32_t maxElementsPerBlock
constexpr uint16_t invalidModuleId
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ uint32_t numElements
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelDigisSoAView digi_view, SiPixelClustersSoAView clus_view, const unsigned int numElements) const
static constexpr uint32_t maxIterGPU
static unsigned int const shift
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ moduleId
constexpr int invalidClusterId
constexpr uint16_t maxNumModules
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr bool isDuplicate(uint32_t const *__restrict__ status, uint16_t x, uint16_t y)
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr Status getStatus(uint32_t const *__restrict__ status, uint16_t x, uint16_t y)
T1 atomicMin(T1 *a, T2 b)
ALPAKA_ASSERT_ACC(offsets)
T1 atomicAdd(T1 *a, T2 b)
constexpr uint16_t invalidModuleId