CMS 3D CMS Logo

SiPixelRawToClusterKernel.dev.cc
Go to the documentation of this file.
1 // C++ includes
2 #include <algorithm>
3 #include <cassert>
4 #include <cstdint>
5 #include <cstdio>
6 #include <type_traits>
7 
8 // Alpaka includes
9 #include <alpaka/alpaka.hpp>
10 
11 // CMSSW includes
31 
32 // local includes
33 #include "CalibPixel.h"
34 #include "ClusterChargeCut.h"
35 #include "PixelClustering.h"
37 
38 // #define GPU_DEBUG
39 
41  namespace pixelDetails {
42 
43  ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) {
45  }
46 
47  ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap,
48  uint8_t fed,
49  uint32_t link,
50  uint32_t roc) {
51  using namespace ::pixelDetails;
52  uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc;
53  DetIdGPU detId = {cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]};
54  return detId;
55  }
56 
57  //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html
58  //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071
59  // Convert local pixel to pixelDetails::global pixel
60  ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(
61  bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) {
62  int slopeRow = 0, slopeCol = 0;
63  int rowOffset = 0, colOffset = 0;
64 
65  if (bpix) {
66  if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1
67  if (rocIdInDetUnit < 8) {
68  slopeRow = 1;
69  slopeCol = -1;
70  rowOffset = 0;
71  colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
72  } else {
73  slopeRow = -1;
74  slopeCol = 1;
75  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
76  colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
77  } // if roc
78  } else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1
79  if (rocIdInDetUnit < 8) {
80  slopeRow = -1;
81  slopeCol = 1;
82  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
83  colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc;
84  } else {
85  slopeRow = 1;
86  slopeCol = -1;
87  rowOffset = 0;
88  colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
89  }
90  }
91 
92  } else { // fpix
93  if (side == -1) { // pannel 1
94  if (rocIdInDetUnit < 8) {
95  slopeRow = 1;
96  slopeCol = -1;
97  rowOffset = 0;
98  colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
99  } else {
100  slopeRow = -1;
101  slopeCol = 1;
102  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
103  colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
104  }
105  } else { // pannel 2
106  if (rocIdInDetUnit < 8) {
107  slopeRow = 1;
108  slopeCol = -1;
109  rowOffset = 0;
110  colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
111  } else {
112  slopeRow = -1;
113  slopeCol = 1;
114  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
115  colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
116  }
117 
118  } // side
119  }
120 
121  uint32_t gRow = rowOffset + slopeRow * local.row;
122  uint32_t gCol = colOffset + slopeCol * local.col;
123  // inside frameConversion row: gRow, column: gCol
124  ::pixelDetails::Pixel global = {gRow, gCol};
125  return global;
126  }
127 
128  // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
129  template <bool debug = false>
130  ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status) {
131  uint8_t errorType = 0;
132 
133  switch (status) {
134  case 1: {
135  if constexpr (debug)
136  printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId);
137  errorType = 35;
138  break;
139  }
140  case 2: {
141  if constexpr (debug)
142  printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId);
143  errorType = 36;
144  break;
145  }
146  case 3: {
147  if constexpr (debug)
148  printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId);
149  errorType = 37;
150  break;
151  }
152  case 4: {
153  if constexpr (debug)
154  printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId);
155  errorType = 38;
156  break;
157  }
158  default:
159  if constexpr (debug)
160  printf("Cabling check returned unexpected result, status = %i\n", status);
161  };
162 
163  return errorType;
164  }
165 
166  ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) {
168  return ((rocRow < ::pixelDetails::numRowsInRoc) & (rocCol < ::pixelDetails::numColsInRoc));
169  }
170 
171  ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); }
172 
173  // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
174  template <bool debug = false>
175  ALPAKA_FN_ACC uint8_t
176  checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap) {
177  uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask;
178  if (errorType < 25)
179  return 0;
180  bool errorFound = false;
181 
182  switch (errorType) {
183  case 25: {
184  errorFound = true;
185  uint32_t index =
187  if (index > 1 && index <= cablingMap.size()) {
188  if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index]))
189  errorFound = false;
190  }
191  if constexpr (debug)
192  if (errorFound)
193  printf("Invalid ROC = 25 found (errorType = 25)\n");
194  break;
195  }
196  case 26: {
197  if constexpr (debug)
198  printf("Gap word found (errorType = 26)\n");
199  break;
200  }
201  case 27: {
202  if constexpr (debug)
203  printf("Dummy word found (errorType = 27)\n");
204  break;
205  }
206  case 28: {
207  if constexpr (debug)
208  printf("Error fifo nearly full (errorType = 28)\n");
209  errorFound = true;
210  break;
211  }
212  case 29: {
213  if constexpr (debug)
214  printf("Timeout on a channel (errorType = 29)\n");
216  if constexpr (debug)
217  printf("...2nd errorType=29 error, skip\n");
218  break;
219  }
220  errorFound = true;
221  break;
222  }
223  case 30: {
224  if constexpr (debug)
225  printf("TBM error trailer (errorType = 30)\n");
226  int stateMatch_bits = 4;
227  int stateMatch_shift = 8;
228  uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits);
229  int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask;
230  if (stateMatch != 1 && stateMatch != 8) {
231  if constexpr (debug)
232  printf("FED error 30 with unexpected State Bits (errorType = 30)\n");
233  break;
234  }
235  if (stateMatch == 1)
236  errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30
237  errorFound = true;
238  break;
239  }
240  case 31: {
241  if constexpr (debug)
242  printf("Event number error (errorType = 31)\n");
243  errorFound = true;
244  break;
245  }
246  default:
247  errorFound = false;
248  };
249 
250  return errorFound ? errorType : 0;
251  }
252 
253  // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
254  template <bool debug = false>
255  ALPAKA_FN_ACC uint32_t
256  getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap) {
257  uint32_t rID = 0xffffffff;
258 
259  switch (errorType) {
260  case 25:
261  case 29:
262  case 30:
263  case 31:
264  case 36:
265  case 40: {
266  uint32_t roc = 1;
267  uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
268  uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
269  if (rID_temp != ::pixelClustering::invalidModuleId)
270  rID = rID_temp;
271  break;
272  }
273  case 37:
274  case 38: {
275  uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask;
276  uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
277  uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
278  if (rID_temp != ::pixelClustering::invalidModuleId)
279  rID = rID_temp;
280  break;
281  }
282  default:
283  break;
284  };
285 
286  return rID;
287  }
288 
289  // Kernel to perform Raw to Digi conversion
290  template <bool debug = false>
292  template <typename TAcc>
293  ALPAKA_FN_ACC void operator()(const TAcc &acc,
294  const SiPixelMappingSoAConstView &cablingMap,
295  const unsigned char *modToUnp,
296  const uint32_t wordCounter,
297  const uint32_t *word,
298  const uint8_t *fedIds,
299  SiPixelDigisSoAView digisView,
301  bool useQualityInfo,
302  bool includeErrors) const {
303  // FIXME there is no guarantee that this is initialised to 0 before any of the atomicInc happens
305  err.size() = 0;
306 
307  for (auto gIndex : cms::alpakatools::uniform_elements(acc, wordCounter)) {
308  auto dvgi = digisView[gIndex];
309  dvgi.xx() = 0;
310  dvgi.yy() = 0;
311  dvgi.adc() = 0;
312 
313  // initialise the errors
314  err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0};
315 
316  uint8_t fedId = fedIds[gIndex / 2]; // +1200;
317 
318  // initialize (too many coninue below)
319  dvgi.pdigi() = 0;
320  dvgi.rawIdArr() = 0;
321  dvgi.moduleId() = ::pixelClustering::invalidModuleId;
322 
323  uint32_t ww = word[gIndex]; // Array containing 32 bit raw data
324  if (ww == 0) {
325  // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization
326  continue;
327  }
328 
329  uint32_t link = sipixelconstants::getLink(ww); // Extract link
330  uint32_t roc = sipixelconstants::getROC(ww); // Extract ROC in link
331 
332  uint8_t errorType = checkROC<debug>(ww, fedId, link, cablingMap);
333  bool skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0);
334  if (includeErrors and skipROC) {
335  uint32_t rawId = getErrRawID<debug>(fedId, ww, errorType, cablingMap);
336  if (rawId != 0xffffffff) // Store errors only for valid DetIds
337  {
338  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, errorType, fedId};
339  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
340  }
341  continue;
342  }
343 
344  // Check for spurious channels
346  uint32_t rawId = getRawId(cablingMap, fedId, link, 1).rawId;
347  if constexpr (debug) {
348  printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex);
349  }
350  if (roc > ::pixelDetails::MAX_ROC and roc < 25) {
351  uint8_t error = conversionError<debug>(fedId, 2);
352  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
353  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
354  }
355  continue;
356  }
357 
358  uint32_t index =
360  if (useQualityInfo) {
361  skipROC = cablingMap.badRocs()[index];
362  if (skipROC)
363  continue;
364  }
365  skipROC = modToUnp[index];
366  if (skipROC)
367  continue;
368 
369  ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc);
370  uint32_t rawId = detId.rawId;
371  uint32_t layer = 0;
372  int side = 0, panel = 0, module = 0;
373  bool barrel = isBarrel(rawId);
374 
375  if (barrel) {
378  side = (module < 5) ? -1 : 1;
379  } else {
380  // endcap ids
381  layer = 0;
383  side = (panel == 1) ? -1 : 1;
384  }
385 
386  ::pixelDetails::Pixel localPix;
387  if (layer == 1) {
388  // Special case of barrel layer 1
389  uint32_t col = sipixelconstants::getCol(ww);
390  uint32_t row = sipixelconstants::getRow(ww);
391  localPix.row = row;
392  localPix.col = col;
393  if (includeErrors and not rocRowColIsValid(row, col)) {
394  uint8_t error = conversionError<debug>(fedId, 3);
395  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
396  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
397  if constexpr (debug)
398  printf("BPIX1 Error status: %i\n", error);
399  continue;
400  }
401  } else {
402  // Other layers with double columns
403  uint32_t dcol = sipixelconstants::getDCol(ww);
404  uint32_t pxid = sipixelconstants::getPxId(ww);
405  uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2;
406  uint32_t col = dcol * 2 + pxid % 2;
407  localPix.row = row;
408  localPix.col = col;
409  if (includeErrors and not dcolIsValid(dcol, pxid)) {
410  uint8_t error = conversionError<debug>(fedId, 3);
411  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
412  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
413  if constexpr (debug)
414  printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc);
415  continue;
416  }
417  }
418 
419  ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, detId.rocInDet, localPix);
420  dvgi.xx() = globalPix.row; // origin shifting by 1 0-159
421  dvgi.yy() = globalPix.col; // origin shifting by 1 0-415
422  dvgi.adc() = sipixelconstants::getADC(ww);
423  dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc());
424  dvgi.moduleId() = detId.moduleId;
425  dvgi.rawIdArr() = rawId;
426  } // end of stride on grid
427 
428  } // end of Raw to Digi kernel operator()
429  }; // end of Raw to Digi struct
430 
431  template <typename TrackerTraits>
433  template <typename TAcc>
434  ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const {
435  ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < 2048); // easy to extend at least till 32*1024
436 
439 
440 #ifndef NDEBUG
441  [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
442  ALPAKA_ASSERT_ACC(0 == blockIdxLocal);
443  [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
445 #endif
446 
447  // limit to maxHitsInModule;
449  clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule());
450  }
451 
453  constexpr auto leftModules = isPhase2 ? 1024 : numberOfModules - 1024;
454 
455  auto &&ws = alpaka::declareSharedVar<uint32_t[32], __COUNTER__>(acc);
456 
458  acc, clus_view.clusModuleStart() + 1, clus_view.clusModuleStart() + 1, 1024, ws);
459 
461  acc, clus_view.clusModuleStart() + 1024 + 1, clus_view.clusModuleStart() + 1024 + 1, leftModules, ws);
462 
463  if constexpr (isPhase2) {
465  acc, clus_view.clusModuleStart() + 2048 + 1, clus_view.clusModuleStart() + 2048 + 1, 1024, ws);
467  clus_view.clusModuleStart() + 3072 + 1,
468  clus_view.clusModuleStart() + 3072 + 1,
469  numberOfModules - 3072,
470  ws);
471  }
472 
473  constexpr auto lastModule = isPhase2 ? 2049u : numberOfModules + 1;
474  for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 1025u, lastModule)) {
475  clus_view[i].clusModuleStart() += clus_view[1024].clusModuleStart();
476  }
477  alpaka::syncBlockThreads(acc);
478 
479  if constexpr (isPhase2) {
480  for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 2049u, 3073u)) {
481  clus_view[i].clusModuleStart() += clus_view[2048].clusModuleStart();
482  }
483  alpaka::syncBlockThreads(acc);
484 
485  for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 3073u, numberOfModules + 1)) {
486  clus_view[i].clusModuleStart() += clus_view[3072].clusModuleStart();
487  }
488  alpaka::syncBlockThreads(acc);
489  }
490 #ifdef GPU_DEBUG
491  ALPAKA_ASSERT_ACC(0 == clus_view[0].moduleStart());
492  auto c0 = std::min(maxHitsInModule, clus_view[1].clusModuleStart());
493  ALPAKA_ASSERT_ACC(c0 == clus_view[1].moduleStart());
494  ALPAKA_ASSERT_ACC(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart());
495  ALPAKA_ASSERT_ACC(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart());
496  ALPAKA_ASSERT_ACC(clus_view[numberOfModules].moduleStart() >= clus_view[1025].moduleStart());
497 
499  if (0 != i)
500  ALPAKA_ASSERT_ACC(clus_view[i].moduleStart() >= clus_view[i - 1].moduleStart());
501  // Check BPX2 (1), FP1 (4)
502  constexpr auto bpix2 = TrackerTraits::layerStart[1];
503  constexpr auto fpix1 = TrackerTraits::layerStart[4];
504  if (i == bpix2 || i == fpix1)
505  printf("moduleStart %d %d\n", i, clus_view[i].moduleStart());
506  }
507 #endif
508  // avoid overflow
511  if (clus_view[i].clusModuleStart() > MAX_HITS)
512  clus_view[i].clusModuleStart() = MAX_HITS;
513  }
514 
515  } // end of FillHitsModuleStart kernel operator()
516  }; // end of FillHitsModuleStart struct
517 
518  // Interface to outside
519  template <typename TrackerTraits>
521  Queue &queue,
522  const SiPixelClusterThresholds clusterThresholds,
523  const SiPixelMappingSoAConstView &cablingMap,
524  const unsigned char *modToUnp,
526  const WordFedAppender &wordFed,
527  const uint32_t wordCounter,
528  const uint32_t fedCounter,
529  bool useQualityInfo,
530  bool includeErrors,
531  bool debug) {
532  nDigis = wordCounter;
533 
534 #ifdef GPU_DEBUG
535  std::cout << "decoding " << wordCounter << " digis." << std::endl;
536 #endif
538  digis_d = SiPixelDigisSoACollection(wordCounter, queue);
539  if (includeErrors) {
540  digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue);
541  }
543  // protect in case of empty event....
544  if (wordCounter) {
545  const int threadsPerBlockOrElementsPerThread =
546  cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? 32 : 512;
547  // fill it all
548  const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread);
549  const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
550  assert(0 == wordCounter % 2);
551  // wordCounter is the total no of words in each event to be trasfered on device
552  auto word_d = cms::alpakatools::make_device_buffer<uint32_t[]>(queue, wordCounter);
553  // NB: IMPORTANT: fedId_d: In legacy, wordCounter elements are allocated.
554  // However, only the first half of elements end up eventually used:
555  // hence, here, only wordCounter/2 elements are allocated.
556  auto fedId_d = cms::alpakatools::make_device_buffer<uint8_t[]>(queue, wordCounter / 2);
557  alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter);
558  alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2);
559  // Launch rawToDigi kernel
560  if (debug) {
561  alpaka::exec<Acc1D>(queue,
562  workDiv,
564  cablingMap,
565  modToUnp,
566  wordCounter,
567  word_d.data(),
568  fedId_d.data(),
569  digis_d->view(),
570  digiErrors_d->view(),
571  useQualityInfo,
572  includeErrors);
573  } else {
574  alpaka::exec<Acc1D>(queue,
575  workDiv,
577  cablingMap,
578  modToUnp,
579  wordCounter,
580  word_d.data(),
581  fedId_d.data(),
582  digis_d->view(),
583  digiErrors_d->view(),
584  useQualityInfo,
585  includeErrors);
586  }
587 
588 #ifdef GPU_DEBUG
590  std::cout << "RawToDigi_kernel was run smoothly!" << std::endl;
591 #endif
592  }
593  // End of Raw2Digi and passing data for clustering
594 
595  {
596  // clusterizer
597  using namespace pixelClustering;
598  // calibrations
599  using namespace calibPixel;
600  const int threadsPerBlockOrElementsPerThread = []() {
601  if constexpr (std::is_same_v<Device, alpaka_common::DevHost>) {
602  // NB: MPORTANT: This could be tuned to benefit from innermost loop.
603  return 32;
604  } else {
605  return 256;
606  }
607  }();
608  const auto blocks = cms::alpakatools::divide_up_by(std::max<int>(wordCounter, numberOfModules),
609  threadsPerBlockOrElementsPerThread);
610  const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
611 
612  if (debug) {
613  alpaka::exec<Acc1D>(queue,
614  workDiv,
616  clusterThresholds,
617  digis_d->view(),
618  clusters_d->view(),
619  gains,
620  wordCounter);
621  } else {
622  alpaka::exec<Acc1D>(queue,
623  workDiv,
625  clusterThresholds,
626  digis_d->view(),
627  clusters_d->view(),
628  gains,
629  wordCounter);
630  }
631 #ifdef GPU_DEBUG
633  std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
634  << " threadsPerBlockOrElementsPerThread\n";
635 #endif
636 
637  alpaka::exec<Acc1D>(
638  queue, workDiv, CountModules<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
639 
640  auto moduleStartFirstElement =
641  cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u);
642  alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
643 
644  const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
645  const auto workDivMaxNumModules =
646  cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
647 #ifdef GPU_DEBUG
648  std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
649  << " threadsPerBlockOrElementsPerThread\n";
650 #endif
651  alpaka::exec<Acc1D>(
652  queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
653 #ifdef GPU_DEBUG
655 #endif
656 
657  constexpr auto threadsPerBlockChargeCut = 256;
658  const auto workDivChargeCut = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockChargeCut);
659  // apply charge cut
660  alpaka::exec<Acc1D>(queue,
661  workDivChargeCut,
663  digis_d->view(),
664  clusters_d->view(),
665  clusterThresholds,
666  wordCounter);
667  // count the module start indices already here (instead of
668  // rechits) so that the number of clusters/hits can be made
669  // available in the rechit producer without additional points of
670  // synchronization/ExternalWork
671 
672  // MUST be ONE block
673  const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
674  alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
675 
676  // last element holds the number of all clusters
677  const auto clusModuleStartLastElement = cms::alpakatools::make_device_view(
678  alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
680 
681  // element startBPIX2 hold the number of clusters until BPIX2
682  const auto bpix2ClusterStart = cms::alpakatools::make_device_view(
683  alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
684  auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
685  alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
686 
687  auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
688  alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
689 
690 #ifdef GPU_DEBUG
692  std::cout << "SiPixelClusterizerAlpaka results:" << std::endl
693  << " > no. of digis: " << nDigis << std::endl
694  << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
695  << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
696  << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
697 #endif
698 
699  } // end clusterizer scope
700  }
701 
702  template <typename TrackerTraits>
704  Queue &queue,
705  const SiPixelClusterThresholds clusterThresholds,
706  SiPixelDigisSoAView &digis_view,
707  const uint32_t numDigis) {
708  using namespace pixelClustering;
709  using pixelTopology::Phase2;
710  nDigis = numDigis;
713  const auto threadsPerBlockOrElementsPerThread = 512;
714  const auto blocks =
715  cms::alpakatools::divide_up_by(std::max<int>(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread);
716  const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
717 
718  alpaka::exec<Acc1D>(
719  queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis);
720 
721 #ifdef GPU_DEBUG
723  std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
724  << " threadsPerBlockOrElementsPerThread\n";
725 #endif
726  alpaka::exec<Acc1D>(
727  queue, workDiv, CountModules<pixelTopology::Phase2>{}, digis_view, clusters_d->view(), numDigis);
728 
729  auto moduleStartFirstElement =
730  cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u);
731  alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
732 
733  const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
734  const auto workDivMaxNumModules =
735  cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
736 #ifdef GPU_DEBUG
738  std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
739  << " threadsPerBlockOrElementsPerThread\n";
740 #endif
741  alpaka::exec<Acc1D>(
742  queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_view, clusters_d->view(), numDigis);
743 #ifdef GPU_DEBUG
745 #endif
746 
747  // apply charge cut
748  alpaka::exec<Acc1D>(queue,
749  workDivMaxNumModules,
751  digis_view,
752  clusters_d->view(),
753  clusterThresholds,
754  numDigis);
755 
756  // count the module start indices already here (instead of
757  // rechits) so that the number of clusters/hits can be made
758  // available in the rechit producer without additional points of
759  // synchronization/ExternalWork
760 
761  // MUST be ONE block
762  const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
763  alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
764 
765  // last element holds the number of all clusters
766  const auto clusModuleStartLastElement = cms::alpakatools::make_device_view(
767  alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
769  // element startBPIX2 hold the number of clusters until BPIX2
770  const auto bpix2ClusterStart = cms::alpakatools::make_device_view(
771  alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
772  auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
773  alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
774 
775  auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
776  alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
777 
778 #ifdef GPU_DEBUG
780  std::cout << "SiPixelPhase2DigiToCluster: results \n"
781  << " > no. of digis: " << numDigis << std::endl
782  << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
783  << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
784  << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
785 #endif
786  } //
787 
791 
792  } // namespace pixelDetails
793 
794 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
const uint32_t gridDimension(alpaka::getWorkDiv< alpaka::Grid, alpaka::Blocks >(acc)[0u])
def pack(high, low)
void makePhase1ClustersAsync(Queue &queue, const SiPixelClusterThresholds clusterThresholds, const SiPixelMappingSoAConstView &cablingMap, const unsigned char *modToUnp, const SiPixelGainCalibrationForHLTSoAConstView &gains, const WordFedAppender &wordFed, const uint32_t wordCounter, const uint32_t fedCounter, bool useQualityInfo, bool includeErrors, bool debug)
constexpr uint32_t moduleStartBit
constexpr uint32_t getRow(uint32_t ww)
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:303
constexpr uint32_t ERROR_mask
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:19
constexpr uint32_t getCol(uint32_t ww)
constexpr auto MAX_LINK
ALPAKA_FN_ACC auto independent_group_elements(TAcc const &acc, TArgs... args)
constexpr uint32_t layerMask
constexpr uint32_t numRowsInRoc
constexpr uint32_t ROC_shift
static const int kSubdetOffset
Definition: DetId.h:22
std::enable_if_t< not std::is_array_v< T >, device_view< TDev, T > > make_device_view(TDev const &device, T &data)
Definition: memory.h:260
assert(be >=bs)
static constexpr uint16_t numberOfModules
void makePhase2ClustersAsync(Queue &queue, const SiPixelClusterThresholds clusterThresholds, SiPixelDigisSoAView &digis_view, const uint32_t numDigis)
constexpr uint16_t numberOfModules
ALPAKA_FN_ACC uint32_t getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap)
constexpr uint32_t panelMask
ALPAKA_FN_ACC void operator()(const TAcc &acc, const SiPixelMappingSoAConstView &cablingMap, const unsigned char *modToUnp, const uint32_t wordCounter, const uint32_t *word, const uint8_t *fedIds, SiPixelDigisSoAView digisView, SiPixelDigiErrorsSoAView err, bool useQualityInfo, bool includeErrors) const
constexpr uint32_t getPxId(uint32_t ww)
uint64_t word
static constexpr uint32_t const * layerStart
ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local)
constexpr int startBPIX2
static const int kSubdetMask
Definition: DetId.h:20
ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid)
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
constexpr uint32_t getDCol(uint32_t ww)
constexpr uint32_t OMIT_ERR_shift
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::conditional_t< std::is_same_v< Device, alpaka::DevCpu >, SiPixelDigisHost, SiPixelDigisDevice< Device > > SiPixelDigisSoACollection
constexpr uint32_t getADC(uint32_t ww)
std::vector< Block > Blocks
Definition: Block.h:99
constexpr uint32_t getLink(uint32_t ww)
constexpr unsigned int MAX_ROC
constexpr uint32_t ROC_mask
constexpr uint32_t moduleMask
constexpr uint32_t maxROCIndex
constexpr uint16_t invalidModuleId
ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap, uint8_t fed, uint32_t link, uint32_t roc)
constexpr uint32_t getROC(uint32_t ww)
constexpr auto MAX_ROC
constexpr float gains[NGAINS]
Definition: EcalConstants.h:20
ALPAKA_FN_ACC uint8_t checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap)
#define debug
Definition: HDRShower.cc:19
constexpr uint32_t LINK_shift
constexpr uint32_t panelStartBit
ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol)
constexpr uint32_t layerStartBit
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
ALPAKA_FN_ACC constexpr bool once_per_grid(TAcc const &acc)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void blockPrefixScan(const TAcc &acc, T const *ci, T *co, int32_t size, T *ws=nullptr)
Definition: prefixScan.h:47
static constexpr uint32_t layerStart[numberOfLayers+1]
constexpr uint32_t OMIT_ERR_mask
col
Definition: cuy.py:1009
ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status)
std::conditional_t< std::is_same_v< Device, alpaka::DevCpu >, SiPixelDigiErrorsHost, SiPixelDigiErrorsDevice< Device > > SiPixelDigiErrorsSoACollection
constexpr uint32_t numColsInRoc
std::enable_if_t< not std::is_array_v< T >, host_view< T > > make_host_view(T &data)
Definition: memory.h:153
ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const
constexpr uint32_t LINK_mask
constexpr unsigned int MAX_LINK
constexpr uint16_t invalidModuleId
std::conditional_t< std::is_same_v< Device, alpaka::DevCpu >, SiPixelClustersHost, SiPixelClustersDevice< Device > > SiPixelClustersSoACollection
constexpr uint32_t maxHitsInModule()