CMS 3D CMS Logo

SiPixelRawToClusterKernel.dev.cc
Go to the documentation of this file.
1 // C++ includes
2 #include <algorithm>
3 #include <cassert>
4 #include <cstdint>
5 #include <cstdio>
6 #include <type_traits>
7 
8 // Alpaka includes
9 #include <alpaka/alpaka.hpp>
10 
11 // CMSSW includes
31 
32 // local includes
33 #include "CalibPixel.h"
34 #include "ClusterChargeCut.h"
35 #include "PixelClustering.h"
37 
38 // #define GPU_DEBUG
39 
41  namespace pixelDetails {
42 
43  ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) {
45  }
46 
47  ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap,
48  uint8_t fed,
49  uint32_t link,
50  uint32_t roc) {
51  using namespace ::pixelDetails;
52  uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc;
53  DetIdGPU detId = {cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]};
54  return detId;
55  }
56 
57  //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html
58  //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071
59  // Convert local pixel to pixelDetails::global pixel
60  ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(
61  bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) {
62  int slopeRow = 0, slopeCol = 0;
63  int rowOffset = 0, colOffset = 0;
64 
65  if (bpix) {
66  if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1
67  if (rocIdInDetUnit < 8) {
68  slopeRow = 1;
69  slopeCol = -1;
70  rowOffset = 0;
71  colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
72  } else {
73  slopeRow = -1;
74  slopeCol = 1;
75  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
76  colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
77  } // if roc
78  } else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1
79  if (rocIdInDetUnit < 8) {
80  slopeRow = -1;
81  slopeCol = 1;
82  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
83  colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc;
84  } else {
85  slopeRow = 1;
86  slopeCol = -1;
87  rowOffset = 0;
88  colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
89  }
90  }
91 
92  } else { // fpix
93  if (side == -1) { // pannel 1
94  if (rocIdInDetUnit < 8) {
95  slopeRow = 1;
96  slopeCol = -1;
97  rowOffset = 0;
98  colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
99  } else {
100  slopeRow = -1;
101  slopeCol = 1;
102  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
103  colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
104  }
105  } else { // pannel 2
106  if (rocIdInDetUnit < 8) {
107  slopeRow = 1;
108  slopeCol = -1;
109  rowOffset = 0;
110  colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
111  } else {
112  slopeRow = -1;
113  slopeCol = 1;
114  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
115  colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
116  }
117 
118  } // side
119  }
120 
121  uint32_t gRow = rowOffset + slopeRow * local.row;
122  uint32_t gCol = colOffset + slopeCol * local.col;
123  // inside frameConversion row: gRow, column: gCol
124  ::pixelDetails::Pixel global = {gRow, gCol};
125  return global;
126  }
127 
128  // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
129  template <bool debug = false>
130  ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status) {
131  uint8_t errorType = 0;
132 
133  switch (status) {
134  case 1: {
135  if constexpr (debug)
136  printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId);
137  errorType = 35;
138  break;
139  }
140  case 2: {
141  if constexpr (debug)
142  printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId);
143  errorType = 36;
144  break;
145  }
146  case 3: {
147  if constexpr (debug)
148  printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId);
149  errorType = 37;
150  break;
151  }
152  case 4: {
153  if constexpr (debug)
154  printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId);
155  errorType = 38;
156  break;
157  }
158  default:
159  if constexpr (debug)
160  printf("Cabling check returned unexpected result, status = %i\n", status);
161  };
162 
163  return errorType;
164  }
165 
166  ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) {
168  return ((rocRow < ::pixelDetails::numRowsInRoc) & (rocCol < ::pixelDetails::numColsInRoc));
169  }
170 
171  ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); }
172 
173  // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
174  template <bool debug = false>
175  ALPAKA_FN_ACC uint8_t
176  checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap) {
177  uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask;
178  if (errorType < 25)
179  return 0;
180  bool errorFound = false;
181 
182  switch (errorType) {
183  case 25: {
184  errorFound = true;
185  uint32_t index =
187  if (index > 1 && index <= cablingMap.size()) {
188  if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index]))
189  errorFound = false;
190  }
191  if constexpr (debug)
192  if (errorFound)
193  printf("Invalid ROC = 25 found (errorType = 25)\n");
194  break;
195  }
196  case 26: {
197  if constexpr (debug)
198  printf("Gap word found (errorType = 26)\n");
199  break;
200  }
201  case 27: {
202  if constexpr (debug)
203  printf("Dummy word found (errorType = 27)\n");
204  break;
205  }
206  case 28: {
207  if constexpr (debug)
208  printf("Error fifo nearly full (errorType = 28)\n");
209  errorFound = true;
210  break;
211  }
212  case 29: {
213  if constexpr (debug)
214  printf("Timeout on a channel (errorType = 29)\n");
216  if constexpr (debug)
217  printf("...2nd errorType=29 error, skip\n");
218  break;
219  }
220  errorFound = true;
221  break;
222  }
223  case 30: {
224  if constexpr (debug)
225  printf("TBM error trailer (errorType = 30)\n");
226  int stateMatch_bits = 4;
227  int stateMatch_shift = 8;
228  uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits);
229  int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask;
230  if (stateMatch != 1 && stateMatch != 8) {
231  if constexpr (debug)
232  printf("FED error 30 with unexpected State Bits (errorType = 30)\n");
233  break;
234  }
235  if (stateMatch == 1)
236  errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30
237  errorFound = true;
238  break;
239  }
240  case 31: {
241  if constexpr (debug)
242  printf("Event number error (errorType = 31)\n");
243  errorFound = true;
244  break;
245  }
246  default:
247  errorFound = false;
248  };
249 
250  return errorFound ? errorType : 0;
251  }
252 
253  // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
254  template <bool debug = false>
255  ALPAKA_FN_ACC uint32_t
256  getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap) {
257  uint32_t rID = 0xffffffff;
258 
259  switch (errorType) {
260  case 25:
261  case 29:
262  case 30:
263  case 31:
264  case 36:
265  case 40: {
266  uint32_t roc = 1;
267  uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
268  uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
269  if (rID_temp != ::pixelClustering::invalidModuleId)
270  rID = rID_temp;
271  break;
272  }
273  case 37:
274  case 38: {
275  uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask;
276  uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
277  uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
278  if (rID_temp != ::pixelClustering::invalidModuleId)
279  rID = rID_temp;
280  break;
281  }
282  default:
283  break;
284  };
285 
286  return rID;
287  }
288 
289  // Kernel to perform Raw to Digi conversion
290  template <bool debug = false>
292  template <typename TAcc>
293  ALPAKA_FN_ACC void operator()(const TAcc &acc,
294  const SiPixelMappingSoAConstView &cablingMap,
295  const unsigned char *modToUnp,
296  const uint32_t wordCounter,
297  const uint32_t *word,
298  const uint8_t *fedIds,
299  SiPixelDigisSoAView digisView,
301  bool useQualityInfo,
302  bool includeErrors) const {
303  // FIXME there is no guarantee that this is initialised to 0 before any of the atomicInc happens
305  err.size() = 0;
306 
307  for (auto gIndex : cms::alpakatools::uniform_elements(acc, wordCounter)) {
308  auto dvgi = digisView[gIndex];
309  dvgi.xx() = 0;
310  dvgi.yy() = 0;
311  dvgi.adc() = 0;
312 
313  // initialise the errors
314  err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0};
315 
316  uint8_t fedId = fedIds[gIndex / 2]; // +1200;
317 
318  // initialize (too many coninue below)
319  dvgi.pdigi() = 0;
320  dvgi.rawIdArr() = 0;
321  dvgi.moduleId() = ::pixelClustering::invalidModuleId;
322 
323  uint32_t ww = word[gIndex]; // Array containing 32 bit raw data
324  if (ww == 0) {
325  // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization
326  continue;
327  }
328 
329  uint32_t link = sipixelconstants::getLink(ww); // Extract link
330  uint32_t roc = sipixelconstants::getROC(ww); // Extract ROC in link
331 
332  uint8_t errorType = checkROC<debug>(ww, fedId, link, cablingMap);
333  bool skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0);
334  if (includeErrors and skipROC) {
335  uint32_t rawId = getErrRawID<debug>(fedId, ww, errorType, cablingMap);
336  if (rawId != 0xffffffff) // Store errors only for valid DetIds
337  {
338  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, errorType, fedId};
339  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
340  }
341  continue;
342  }
343 
344  // Check for spurious channels
346  uint32_t rawId = getRawId(cablingMap, fedId, link, 1).rawId;
347  if constexpr (debug) {
348  printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex);
349  }
350  if (roc > ::pixelDetails::MAX_ROC and roc < 25) {
351  uint8_t error = conversionError<debug>(fedId, 2);
352  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
353  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
354  }
355  continue;
356  }
357 
358  uint32_t index =
360  if (useQualityInfo) {
361  skipROC = cablingMap.badRocs()[index];
362  if (skipROC)
363  continue;
364  }
365  skipROC = modToUnp[index];
366  if (skipROC)
367  continue;
368 
369  ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc);
370  uint32_t rawId = detId.rawId;
371  uint32_t layer = 0;
372  int side = 0, panel = 0, module = 0;
373  bool barrel = isBarrel(rawId);
374 
375  if (barrel) {
378  side = (module < 5) ? -1 : 1;
379  } else {
380  // endcap ids
381  layer = 0;
383  side = (panel == 1) ? -1 : 1;
384  }
385 
386  ::pixelDetails::Pixel localPix;
387  if (layer == 1) {
388  // Special case of barrel layer 1
389  uint32_t col = sipixelconstants::getCol(ww);
390  uint32_t row = sipixelconstants::getRow(ww);
391  localPix.row = row;
392  localPix.col = col;
393  if (includeErrors and not rocRowColIsValid(row, col)) {
394  uint8_t error = conversionError<debug>(fedId, 3);
395  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
396  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
397  if constexpr (debug)
398  printf("BPIX1 Error status: %i\n", error);
399  continue;
400  }
401  } else {
402  // Other layers with double columns
403  uint32_t dcol = sipixelconstants::getDCol(ww);
404  uint32_t pxid = sipixelconstants::getPxId(ww);
405  uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2;
406  uint32_t col = dcol * 2 + pxid % 2;
407  localPix.row = row;
408  localPix.col = col;
409  if (includeErrors and not dcolIsValid(dcol, pxid)) {
410  uint8_t error = conversionError<debug>(fedId, 3);
411  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
412  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
413  if constexpr (debug)
414  printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc);
415  continue;
416  }
417  }
418 
419  ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, detId.rocInDet, localPix);
420  dvgi.xx() = globalPix.row; // origin shifting by 1 0-159
421  dvgi.yy() = globalPix.col; // origin shifting by 1 0-415
422  dvgi.adc() = sipixelconstants::getADC(ww);
423  dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc());
424  dvgi.moduleId() = detId.moduleId;
425  dvgi.rawIdArr() = rawId;
426  } // end of stride on grid
427 
428  } // end of Raw to Digi kernel operator()
429  }; // end of Raw to Digi struct
430 
431  template <typename TrackerTraits>
433  template <typename TAcc>
434  ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const {
436 
437  // For Phase1 there are 1856 pixel modules
438  // For Phase2 there are 3872 pixel modules
439  // For whichever setup with more modules it would be
440  // easy to extend at least till 32*1024
441 
442  constexpr uint16_t prefixScanUpperLimit = isPhase2 ? 4096 : 2048;
443  ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < prefixScanUpperLimit);
444 
447 
448 #ifndef NDEBUG
449  [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
450  ALPAKA_ASSERT_ACC(0 == blockIdxLocal);
451  [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
452  ALPAKA_ASSERT_ACC(1 == gridDimension);
453 #endif
454 
455  // limit to maxHitsInModule;
457  clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule());
458  }
459 
460  constexpr auto leftModules = isPhase2 ? 1024 : numberOfModules - 1024;
461 
462  auto &&ws = alpaka::declareSharedVar<uint32_t[32], __COUNTER__>(acc);
463 
465  acc, clus_view.clusModuleStart() + 1, clus_view.clusModuleStart() + 1, 1024, ws);
466 
468  acc, clus_view.clusModuleStart() + 1024 + 1, clus_view.clusModuleStart() + 1024 + 1, leftModules, ws);
469 
470  if constexpr (isPhase2) {
472  acc, clus_view.clusModuleStart() + 2048 + 1, clus_view.clusModuleStart() + 2048 + 1, 1024, ws);
474  clus_view.clusModuleStart() + 3072 + 1,
475  clus_view.clusModuleStart() + 3072 + 1,
476  numberOfModules - 3072,
477  ws);
478  }
479 
480  constexpr auto lastModule = isPhase2 ? 2049u : numberOfModules + 1;
481  for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 1025u, lastModule)) {
482  clus_view[i].clusModuleStart() += clus_view[1024].clusModuleStart();
483  }
484  alpaka::syncBlockThreads(acc);
485 
486  if constexpr (isPhase2) {
487  for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 2049u, 3073u)) {
488  clus_view[i].clusModuleStart() += clus_view[2048].clusModuleStart();
489  }
490  alpaka::syncBlockThreads(acc);
491 
492  for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 3073u, numberOfModules + 1)) {
493  clus_view[i].clusModuleStart() += clus_view[3072].clusModuleStart();
494  }
495  alpaka::syncBlockThreads(acc);
496  }
497 #ifdef GPU_DEBUG
498  ALPAKA_ASSERT_ACC(0 == clus_view[1].moduleStart());
499  auto c0 = std::min(maxHitsInModule, clus_view[2].clusModuleStart());
500  ALPAKA_ASSERT_ACC(c0 == clus_view[2].moduleStart());
501  ALPAKA_ASSERT_ACC(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart());
502  ALPAKA_ASSERT_ACC(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart());
503  ALPAKA_ASSERT_ACC(clus_view[numberOfModules].moduleStart() >= clus_view[1025].moduleStart());
504 
506  if (0 != i)
507  ALPAKA_ASSERT_ACC(clus_view[i].moduleStart() >= clus_view[i - 1].moduleStart());
508  // Check BPX2 (1), FP1 (4)
509  constexpr auto bpix2 = TrackerTraits::layerStart[1];
510  constexpr auto fpix1 = TrackerTraits::layerStart[4];
511  if (i == bpix2 || i == fpix1)
512  printf("moduleStart %d %d\n", i, clus_view[i].moduleStart());
513  }
514 
515 #endif
516 
517  } // end of FillHitsModuleStart kernel operator()
518  }; // end of FillHitsModuleStart struct
519 
520  // Interface to outside
521  template <typename TrackerTraits>
523  Queue &queue,
524  const SiPixelClusterThresholds clusterThresholds,
525  const SiPixelMappingSoAConstView &cablingMap,
526  const unsigned char *modToUnp,
528  const WordFedAppender &wordFed,
529  const uint32_t wordCounter,
530  const uint32_t fedCounter,
531  bool useQualityInfo,
532  bool includeErrors,
533  bool debug) {
534  nDigis = wordCounter;
535 
536 #ifdef GPU_DEBUG
537  std::cout << "decoding " << wordCounter << " digis." << std::endl;
538 #endif
540  digis_d = SiPixelDigisSoACollection(wordCounter, queue);
541  if (includeErrors) {
542  digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue);
543  }
545  // protect in case of empty event....
546  if (wordCounter) {
547  const int threadsPerBlockOrElementsPerThread =
548  cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? 32 : 512;
549  // fill it all
550  const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread);
551  const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
552  assert(0 == wordCounter % 2);
553  // wordCounter is the total no of words in each event to be trasfered on device
554  auto word_d = cms::alpakatools::make_device_buffer<uint32_t[]>(queue, wordCounter);
555  // NB: IMPORTANT: fedId_d: In legacy, wordCounter elements are allocated.
556  // However, only the first half of elements end up eventually used:
557  // hence, here, only wordCounter/2 elements are allocated.
558  auto fedId_d = cms::alpakatools::make_device_buffer<uint8_t[]>(queue, wordCounter / 2);
559  alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter);
560  alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2);
561  // Launch rawToDigi kernel
562  if (debug) {
563  alpaka::exec<Acc1D>(queue,
564  workDiv,
566  cablingMap,
567  modToUnp,
568  wordCounter,
569  word_d.data(),
570  fedId_d.data(),
571  digis_d->view(),
572  digiErrors_d->view(),
573  useQualityInfo,
574  includeErrors);
575  } else {
576  alpaka::exec<Acc1D>(queue,
577  workDiv,
579  cablingMap,
580  modToUnp,
581  wordCounter,
582  word_d.data(),
583  fedId_d.data(),
584  digis_d->view(),
585  digiErrors_d->view(),
586  useQualityInfo,
587  includeErrors);
588  }
589 
590 #ifdef GPU_DEBUG
592  std::cout << "RawToDigi_kernel was run smoothly!" << std::endl;
593 #endif
594  }
595  // End of Raw2Digi and passing data for clustering
596 
597  {
598  // clusterizer
599  using namespace pixelClustering;
600  // calibrations
601  using namespace calibPixel;
602  const int threadsPerBlockOrElementsPerThread = []() {
603  if constexpr (std::is_same_v<Device, alpaka_common::DevHost>) {
604  // NB: MPORTANT: This could be tuned to benefit from innermost loop.
605  return 32;
606  } else {
607  return 256;
608  }
609  }();
610  const auto blocks = cms::alpakatools::divide_up_by(std::max<int>(wordCounter, numberOfModules),
611  threadsPerBlockOrElementsPerThread);
612  const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
613 
614  if (debug) {
615  alpaka::exec<Acc1D>(queue,
616  workDiv,
618  clusterThresholds,
619  digis_d->view(),
620  clusters_d->view(),
621  gains,
622  wordCounter);
623  } else {
624  alpaka::exec<Acc1D>(queue,
625  workDiv,
627  clusterThresholds,
628  digis_d->view(),
629  clusters_d->view(),
630  gains,
631  wordCounter);
632  }
633 #ifdef GPU_DEBUG
635  std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
636  << " threadsPerBlockOrElementsPerThread\n";
637 #endif
638 
639  alpaka::exec<Acc1D>(
640  queue, workDiv, CountModules<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
641 
642  auto moduleStartFirstElement =
643  cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u);
644  alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
645 
646  const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
647  const auto workDivMaxNumModules =
648  cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
649 #ifdef GPU_DEBUG
650  std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
651  << " threadsPerBlockOrElementsPerThread\n";
652 #endif
653  alpaka::exec<Acc1D>(
654  queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
655 #ifdef GPU_DEBUG
657 #endif
658 
659  constexpr auto threadsPerBlockChargeCut = 256;
660  const auto workDivChargeCut = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockChargeCut);
661  // apply charge cut
662  alpaka::exec<Acc1D>(queue,
663  workDivChargeCut,
665  digis_d->view(),
666  clusters_d->view(),
667  clusterThresholds,
668  wordCounter);
669  // count the module start indices already here (instead of
670  // rechits) so that the number of clusters/hits can be made
671  // available in the rechit producer without additional points of
672  // synchronization/ExternalWork
673 
674  // MUST be ONE block
675  const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
676  alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
677 
678  // last element holds the number of all clusters
679  const auto clusModuleStartLastElement = cms::alpakatools::make_device_view(
680  alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
682 
683  // element startBPIX2 hold the number of clusters until BPIX2
684  const auto bpix2ClusterStart = cms::alpakatools::make_device_view(
685  alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
686  auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
687  alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
688 
689  auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
690  alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
691 
692 #ifdef GPU_DEBUG
694  std::cout << "SiPixelClusterizerAlpaka results:" << std::endl
695  << " > no. of digis: " << nDigis << std::endl
696  << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
697  << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
698  << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
699 #endif
700 
701  } // end clusterizer scope
702  }
703 
704  template <typename TrackerTraits>
706  Queue &queue,
707  const SiPixelClusterThresholds clusterThresholds,
708  SiPixelDigisSoAView &digis_view,
709  const uint32_t numDigis) {
710  using namespace pixelClustering;
711  using pixelTopology::Phase2;
712  nDigis = numDigis;
715  const auto threadsPerBlockOrElementsPerThread = 512;
716  const auto blocks =
717  cms::alpakatools::divide_up_by(std::max<int>(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread);
718  const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
719 
720  alpaka::exec<Acc1D>(
721  queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis);
722 
723 #ifdef GPU_DEBUG
725  std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
726  << " threadsPerBlockOrElementsPerThread\n";
727 #endif
728  alpaka::exec<Acc1D>(
729  queue, workDiv, CountModules<pixelTopology::Phase2>{}, digis_view, clusters_d->view(), numDigis);
730 
731  auto moduleStartFirstElement =
732  cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u);
733  alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
734 
735  const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
736  const auto workDivMaxNumModules =
737  cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
738 #ifdef GPU_DEBUG
740  std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
741  << " threadsPerBlockOrElementsPerThread\n";
742 #endif
743  alpaka::exec<Acc1D>(
744  queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_view, clusters_d->view(), numDigis);
745 #ifdef GPU_DEBUG
747 #endif
748 
749  // apply charge cut
750  alpaka::exec<Acc1D>(queue,
751  workDivMaxNumModules,
753  digis_view,
754  clusters_d->view(),
755  clusterThresholds,
756  numDigis);
757 
758  // count the module start indices already here (instead of
759  // rechits) so that the number of clusters/hits can be made
760  // available in the rechit producer without additional points of
761  // synchronization/ExternalWork
762 
763  // MUST be ONE block
764  const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
765  alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
766 
767  // last element holds the number of all clusters
768  const auto clusModuleStartLastElement = cms::alpakatools::make_device_view(
769  alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
771  // element startBPIX2 hold the number of clusters until BPIX2
772  const auto bpix2ClusterStart = cms::alpakatools::make_device_view(
773  alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
774  auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
775  alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
776 
777  auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
778  alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
779 
780 #ifdef GPU_DEBUG
782  std::cout << "SiPixelPhase2DigiToCluster: results \n"
783  << " > no. of digis: " << numDigis << std::endl
784  << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
785  << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
786  << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
787 #endif
788  } //
789 
793 
794  } // namespace pixelDetails
795 
796 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
def pack(high, low)
void makePhase1ClustersAsync(Queue &queue, const SiPixelClusterThresholds clusterThresholds, const SiPixelMappingSoAConstView &cablingMap, const unsigned char *modToUnp, const SiPixelGainCalibrationForHLTSoAConstView &gains, const WordFedAppender &wordFed, const uint32_t wordCounter, const uint32_t fedCounter, bool useQualityInfo, bool includeErrors, bool debug)
constexpr uint32_t moduleStartBit
constexpr uint32_t getRow(uint32_t ww)
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
constexpr uint32_t ERROR_mask
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
constexpr uint32_t getCol(uint32_t ww)
constexpr auto MAX_LINK
ALPAKA_FN_ACC auto independent_group_elements(TAcc const &acc, TArgs... args)
constexpr uint32_t layerMask
constexpr uint32_t numRowsInRoc
constexpr uint32_t ROC_shift
static const int kSubdetOffset
Definition: DetId.h:22
std::enable_if_t< not std::is_array_v< T >, device_view< TDev, T > > make_device_view(TDev const &device, T &data)
Definition: memory.h:260
assert(be >=bs)
static constexpr uint16_t numberOfModules
void makePhase2ClustersAsync(Queue &queue, const SiPixelClusterThresholds clusterThresholds, SiPixelDigisSoAView &digis_view, const uint32_t numDigis)
constexpr uint16_t numberOfModules
ALPAKA_FN_ACC uint32_t getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap)
constexpr uint32_t panelMask
ALPAKA_FN_ACC void operator()(const TAcc &acc, const SiPixelMappingSoAConstView &cablingMap, const unsigned char *modToUnp, const uint32_t wordCounter, const uint32_t *word, const uint8_t *fedIds, SiPixelDigisSoAView digisView, SiPixelDigiErrorsSoAView err, bool useQualityInfo, bool includeErrors) const
constexpr uint32_t getPxId(uint32_t ww)
uint64_t word
static constexpr uint32_t const * layerStart
ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local)
constexpr int startBPIX2
static const int kSubdetMask
Definition: DetId.h:20
ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid)
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
constexpr uint32_t getDCol(uint32_t ww)
constexpr uint32_t OMIT_ERR_shift
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::conditional_t< std::is_same_v< Device, alpaka::DevCpu >, SiPixelDigisHost, SiPixelDigisDevice< Device > > SiPixelDigisSoACollection
constexpr uint32_t getADC(uint32_t ww)
std::vector< Block > Blocks
Definition: Block.h:99
constexpr uint32_t getLink(uint32_t ww)
constexpr unsigned int MAX_ROC
constexpr uint32_t ROC_mask
constexpr uint32_t moduleMask
constexpr uint32_t maxROCIndex
constexpr uint16_t invalidModuleId
ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap, uint8_t fed, uint32_t link, uint32_t roc)
constexpr uint32_t getROC(uint32_t ww)
constexpr auto MAX_ROC
constexpr float gains[NGAINS]
Definition: EcalConstants.h:20
ALPAKA_FN_ACC uint8_t checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap)
#define debug
Definition: HDRShower.cc:19
constexpr uint32_t LINK_shift
constexpr uint32_t panelStartBit
ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol)
constexpr uint32_t layerStartBit
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
ALPAKA_FN_ACC constexpr bool once_per_grid(TAcc const &acc)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void blockPrefixScan(const TAcc &acc, T const *ci, T *co, int32_t size, T *ws=nullptr)
Definition: prefixScan.h:47
static constexpr uint32_t layerStart[numberOfLayers+1]
constexpr uint32_t OMIT_ERR_mask
col
Definition: cuy.py:1009
ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status)
std::conditional_t< std::is_same_v< Device, alpaka::DevCpu >, SiPixelDigiErrorsHost, SiPixelDigiErrorsDevice< Device > > SiPixelDigiErrorsSoACollection
constexpr uint32_t numColsInRoc
std::enable_if_t< not std::is_array_v< T >, host_view< T > > make_host_view(T &data)
Definition: memory.h:153
ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const
constexpr uint32_t LINK_mask
constexpr unsigned int MAX_LINK
constexpr uint16_t invalidModuleId
std::conditional_t< std::is_same_v< Device, alpaka::DevCpu >, SiPixelClustersHost, SiPixelClustersDevice< Device > > SiPixelClustersSoACollection
constexpr uint32_t maxHitsInModule()