CMS 3D CMS Logo

PixelRecHits.h
Go to the documentation of this file.
1 #ifndef RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h
2 #define RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h
3 
4 #include <algorithm>
5 #include <cstdint>
6 #include <cstdio>
7 #include <limits>
8 
9 #include <alpaka/alpaka.hpp>
10 
21 
22 //#define GPU_DEBUG
23 
25  namespace pixelRecHits {
26 
27  template <typename TrackerTraits>
28  class GetHits {
29  public:
30  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
31  ALPAKA_FN_ACC void operator()(const TAcc& acc,
32  pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const* __restrict__ cpeParams,
33  BeamSpotPOD const* __restrict__ bs,
35  uint32_t numElements,
38  // FIXME
39  // the compiler seems NOT to optimize loads from views (even in a simple test case)
40  // The whole gimnastic here of copying or not is a pure heuristic exercise that seems to produce the fastest code with the above signature
41  // not using views (passing a gazzilion of array pointers) seems to produce the fastest code (but it is harder to mantain)
42 
43  ALPAKA_ASSERT_OFFLOAD(cpeParams);
44 
45  const uint32_t blockIdx(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
46 
47  // copy average geometry corrected by beamspot . FIXME (move it somewhere else???)
48  if (0 == blockIdx) {
49  auto& agc = hits.averageGeometry();
50  auto const& ag = cpeParams->averageGeometry();
51  auto nLadders = TrackerTraits::numberOfLaddersInBarrel;
52 
53  cms::alpakatools::for_each_element_in_block_strided(acc, nLadders, [&](uint32_t il) {
54  agc.ladderZ[il] = ag.ladderZ[il] - bs->z;
55  agc.ladderX[il] = ag.ladderX[il] - bs->x;
56  agc.ladderY[il] = ag.ladderY[il] - bs->y;
57  agc.ladderR[il] = sqrt(agc.ladderX[il] * agc.ladderX[il] + agc.ladderY[il] * agc.ladderY[il]);
58  agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z;
59  agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z;
60  });
61 
63  agc.endCapZ[0] = ag.endCapZ[0] - bs->z;
64  agc.endCapZ[1] = ag.endCapZ[1] - bs->z;
65  }
66  }
67 
68  // to be moved in common namespace...
71 
73 
74  // as usual one block per module
75  auto& clusParams = alpaka::declareSharedVar<ClusParams, __COUNTER__>(acc);
76 
77  auto me = clusters[blockIdx].moduleId();
78  int nclus = clusters[me].clusInModule();
79 
80  if (0 == nclus)
81  return;
82 #ifdef GPU_DEBUG
84  auto k = clusters[1 + blockIdx].moduleStart();
85  while (digis[k].moduleId() == invalidModuleId)
86  ++k;
87  ALPAKA_ASSERT_OFFLOAD(digis[k].moduleId() == me);
88  }
89 
90  if (me % 100 == 1)
92  printf(
93  "hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, clusters[me].clusModuleStart());
94 #endif
95 
96  for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += MaxHitsInIter) {
97  auto first = clusters[1 + blockIdx].moduleStart();
98 
99  int nClusInIter = alpaka::math::min(acc, MaxHitsInIter, endClus - startClus);
100  int lastClus = startClus + nClusInIter;
101  assert(nClusInIter <= nclus);
102  assert(nClusInIter > 0);
103  assert(lastClus <= nclus);
104 
105  assert(nclus > MaxHitsInIter || (0 == startClus && nClusInIter == nclus && lastClus == nclus));
106 
107  // init
108  cms::alpakatools::for_each_element_in_block_strided(acc, nClusInIter, [&](uint32_t ic) {
109  clusParams.minRow[ic] = std::numeric_limits<uint32_t>::max();
110  clusParams.maxRow[ic] = 0;
111  clusParams.minCol[ic] = std::numeric_limits<uint32_t>::max();
112  clusParams.maxCol[ic] = 0;
113  clusParams.charge[ic] = 0;
114  clusParams.q_f_X[ic] = 0;
115  clusParams.q_l_X[ic] = 0;
116  clusParams.q_f_Y[ic] = 0;
117  clusParams.q_l_Y[ic] = 0;
118  });
119 
120  alpaka::syncBlockThreads(acc);
121 
122  // one thread per "digi"
123  const uint32_t blockDimension(alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]);
124  const auto& [firstElementIdxNoStride, endElementIdxNoStride] =
126  uint32_t rowsColsFirstElementIdx = firstElementIdxNoStride;
127  uint32_t rowsColsEndElementIdx = endElementIdxNoStride;
128  for (uint32_t i = rowsColsFirstElementIdx; i < numElements; ++i) {
130  i, rowsColsFirstElementIdx, rowsColsEndElementIdx, blockDimension, numElements))
131  break;
132  auto id = digis[i].moduleId();
133  if (id == invalidModuleId)
134  continue; // not valid
135  if (id != me)
136  break; // end of module
137  auto cl = digis[i].clus();
138  if (cl < startClus || cl >= lastClus)
139  continue;
140  cl -= startClus;
143  auto x = digis[i].xx();
144  auto y = digis[i].yy();
145  alpaka::atomicMin(acc, &clusParams.minRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{});
146  alpaka::atomicMax(acc, &clusParams.maxRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{});
147  alpaka::atomicMin(acc, &clusParams.minCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{});
148  alpaka::atomicMax(acc, &clusParams.maxCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{});
149  }
150 
151  alpaka::syncBlockThreads(acc);
152 
153  auto pixmx = cpeParams->detParams(me).pixmx;
154  uint32_t chargeFirstElementIdx = firstElementIdxNoStride;
155  uint32_t chargeEndElementIdx = endElementIdxNoStride;
156  for (uint32_t i = chargeFirstElementIdx; i < numElements; ++i) {
158  i, chargeFirstElementIdx, chargeEndElementIdx, blockDimension, numElements))
159  break;
160  auto id = digis[i].moduleId();
161  if (id == invalidModuleId)
162  continue; // not valid
163  if (id != me)
164  break; // end of module
165  auto cl = digis[i].clus();
166  if (cl < startClus || cl >= lastClus)
167  continue;
168  cl -= startClus;
171  auto x = digis[i].xx();
172  auto y = digis[i].yy();
173  auto ch = digis[i].adc();
174  alpaka::atomicAdd(acc, &clusParams.charge[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
175  ch = alpaka::math::min(acc, ch, pixmx);
176  if (clusParams.minRow[cl] == x)
177  alpaka::atomicAdd(acc, &clusParams.q_f_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
178  if (clusParams.maxRow[cl] == x)
179  alpaka::atomicAdd(acc, &clusParams.q_l_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
180  if (clusParams.minCol[cl] == y)
181  alpaka::atomicAdd(acc, &clusParams.q_f_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
182  if (clusParams.maxCol[cl] == y)
183  alpaka::atomicAdd(acc, &clusParams.q_l_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
184  }
185 
186  alpaka::syncBlockThreads(acc);
187 
188  // next one cluster per thread...
189  first = clusters[me].clusModuleStart() + startClus;
190  cms::alpakatools::for_each_element_in_block_strided(acc, nClusInIter, [&](uint32_t ic) {
191  auto h = first + ic; // output index in global memory
192 
193  assert(h < (uint32_t)hits.metadata().size());
194  assert(h < clusters[me + 1].clusModuleStart());
195 
196  pixelCPEforDevice::position<TrackerTraits>(
197  cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
198 
199  pixelCPEforDevice::errorFromDB<TrackerTraits>(
200  cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
201 
202  // store it
203  hits[h].chargeAndStatus().charge = clusParams.charge[ic];
204  hits[h].chargeAndStatus().status = clusParams.status[ic];
205  hits[h].detectorIndex() = me;
206 
207  float xl, yl;
208  hits[h].xLocal() = xl = clusParams.xpos[ic];
209  hits[h].yLocal() = yl = clusParams.ypos[ic];
210 
211  hits[h].clusterSizeX() = clusParams.xsize[ic];
212  hits[h].clusterSizeY() = clusParams.ysize[ic];
213 
214  hits[h].xerrLocal() = clusParams.xerr[ic] * clusParams.xerr[ic] + cpeParams->detParams(me).apeXX;
215  hits[h].yerrLocal() = clusParams.yerr[ic] * clusParams.yerr[ic] + cpeParams->detParams(me).apeYY;
216 
217  // keep it local for computations
218  float xg, yg, zg;
219  // to global and compute phi...
220  cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg);
221  // here correct for the beamspot...
222  xg -= bs->x;
223  yg -= bs->y;
224  zg -= bs->z;
225 
226  hits[h].xGlobal() = xg;
227  hits[h].yGlobal() = yg;
228  hits[h].zGlobal() = zg;
229 
230  hits[h].rGlobal() = alpaka::math::sqrt(acc, xg * xg + yg * yg);
231  hits[h].iphi() = unsafe_atan2s<7>(yg, xg);
232  });
233  alpaka::syncBlockThreads(acc);
234  } // end loop on batches
235  }
236  };
237 
238  } // namespace pixelRecHits
239 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
240 
241 #endif // RecoLocalTracker_SiPixelRecHits_plugins_alpaka_PixelRecHits_h
T1 atomicMax(T1 *a, T2 b)
Definition: cudaCompat.h:97
ALPAKA_FN_ACC ALPAKA_FN_INLINE bool next_valid_element_index_strided(Idx &i, Idx &firstElementIdx, Idx &endElementIdx, const Idx stride, const Idx maxNumberOfElements)
Definition: workdivision.h:921
ALPAKA_FN_ACC constexpr bool once_per_block(TAcc const &acc)
Definition: workdivision.h:805
constexpr DetParams const &__restrict__ detParams(int i) const
ALPAKA_FN_ACC void operator()(const TAcc &acc, pixelCPEforDevice::ParamsOnDeviceT< TrackerTraits > const *__restrict__ cpeParams, BeamSpotPOD const *__restrict__ bs, SiPixelDigisSoAConstView digis, uint32_t numElements, SiPixelClustersSoAConstView clusters, TrackingRecHitSoAView< TrackerTraits > hits) const
Definition: PixelRecHits.h:31
constexpr int32_t MaxHitsInIter
assert(be >=bs)
ClusParamsT< MaxHitsInIter > ClusParams
ALPAKA_FN_ACC void for_each_element_in_block_strided(const TAcc &acc, const Idx maxNumberOfElements, const Idx elementIdxShift, const Func func, const unsigned int dimIndex=0)
Definition: workdivision.h:936
T sqrt(T t)
Definition: SSEVec.h:19
constexpr CommonParams const &__restrict__ commonParams() const
const dim3 blockIdx
Definition: cudaCompat.h:32
ALPAKA_FN_ACC std::pair< Idx, Idx > element_index_range_in_block(const TAcc &acc, const Idx elementIdxShift, const unsigned int dimIndex=0u)
Definition: workdivision.h:818
constexpr uint16_t invalidModuleId
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ uint32_t numElements
constexpr AverageGeometry const &__restrict__ averageGeometry() const
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ moduleId
float x
typename TrackingRecHitSoA< TrackerTraits >::template TrackingRecHitSoALayout<>::View TrackingRecHitSoAView
T1 atomicMin(T1 *a, T2 b)
Definition: cudaCompat.h:85
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
constexpr uint16_t invalidModuleId