CMS 3D CMS Logo

SiPixelRawToClusterKernel.dev.cc
Go to the documentation of this file.
1 // C++ includes
2 #include <algorithm>
3 #include <cassert>
4 #include <cstdint>
5 #include <cstdio>
6 #include <type_traits>
7 
8 // Alpaka includes
9 #include <alpaka/alpaka.hpp>
10 
11 // CMSSW includes
31 
32 // local includes
33 #include "CalibPixel.h"
34 #include "ClusterChargeCut.h"
35 #include "PixelClustering.h"
37 
38 // #define GPU_DEBUG
39 
41  namespace pixelDetails {
42 
43  ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) {
45  }
46 
47  ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap,
48  uint8_t fed,
49  uint32_t link,
50  uint32_t roc) {
51  using namespace ::pixelDetails;
52  uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc;
53  DetIdGPU detId = {cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]};
54  return detId;
55  }
56 
57  //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html
58  //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071
59  // Convert local pixel to pixelDetails::global pixel
60  ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(
61  bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) {
62  int slopeRow = 0, slopeCol = 0;
63  int rowOffset = 0, colOffset = 0;
64 
65  if (bpix) {
66  if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1
67  if (rocIdInDetUnit < 8) {
68  slopeRow = 1;
69  slopeCol = -1;
70  rowOffset = 0;
71  colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
72  } else {
73  slopeRow = -1;
74  slopeCol = 1;
75  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
76  colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
77  } // if roc
78  } else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1
79  if (rocIdInDetUnit < 8) {
80  slopeRow = -1;
81  slopeCol = 1;
82  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
83  colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc;
84  } else {
85  slopeRow = 1;
86  slopeCol = -1;
87  rowOffset = 0;
88  colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
89  }
90  }
91 
92  } else { // fpix
93  if (side == -1) { // pannel 1
94  if (rocIdInDetUnit < 8) {
95  slopeRow = 1;
96  slopeCol = -1;
97  rowOffset = 0;
98  colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
99  } else {
100  slopeRow = -1;
101  slopeCol = 1;
102  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
103  colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
104  }
105  } else { // pannel 2
106  if (rocIdInDetUnit < 8) {
107  slopeRow = 1;
108  slopeCol = -1;
109  rowOffset = 0;
110  colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
111  } else {
112  slopeRow = -1;
113  slopeCol = 1;
114  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
115  colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
116  }
117 
118  } // side
119  }
120 
121  uint32_t gRow = rowOffset + slopeRow * local.row;
122  uint32_t gCol = colOffset + slopeCol * local.col;
123  // inside frameConversion row: gRow, column: gCol
124  ::pixelDetails::Pixel global = {gRow, gCol};
125  return global;
126  }
127 
128  // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
129  template <bool debug = false>
130  ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status) {
131  uint8_t errorType = 0;
132 
133  switch (status) {
134  case 1: {
135  if constexpr (debug)
136  printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId);
137  errorType = 35;
138  break;
139  }
140  case 2: {
141  if constexpr (debug)
142  printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId);
143  errorType = 36;
144  break;
145  }
146  case 3: {
147  if constexpr (debug)
148  printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId);
149  errorType = 37;
150  break;
151  }
152  case 4: {
153  if constexpr (debug)
154  printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId);
155  errorType = 38;
156  break;
157  }
158  default:
159  if constexpr (debug)
160  printf("Cabling check returned unexpected result, status = %i\n", status);
161  };
162 
163  return errorType;
164  }
165 
166  ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) {
168  return ((rocRow < ::pixelDetails::numRowsInRoc) & (rocCol < ::pixelDetails::numColsInRoc));
169  }
170 
171  ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); }
172 
173  // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
174  template <bool debug = false>
175  ALPAKA_FN_ACC uint8_t
176  checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap) {
177  uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask;
178  if (errorType < 25)
179  return 0;
180  bool errorFound = false;
181 
182  switch (errorType) {
183  case 25: {
184  errorFound = true;
185  uint32_t index =
187  if (index > 1 && index <= cablingMap.size()) {
188  if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index]))
189  errorFound = false;
190  }
191  if constexpr (debug)
192  if (errorFound)
193  printf("Invalid ROC = 25 found (errorType = 25)\n");
194  break;
195  }
196  case 26: {
197  if constexpr (debug)
198  printf("Gap word found (errorType = 26)\n");
199  break;
200  }
201  case 27: {
202  if constexpr (debug)
203  printf("Dummy word found (errorType = 27)\n");
204  break;
205  }
206  case 28: {
207  if constexpr (debug)
208  printf("Error fifo nearly full (errorType = 28)\n");
209  errorFound = true;
210  break;
211  }
212  case 29: {
213  if constexpr (debug)
214  printf("Timeout on a channel (errorType = 29)\n");
216  if constexpr (debug)
217  printf("...2nd errorType=29 error, skip\n");
218  break;
219  }
220  errorFound = true;
221  break;
222  }
223  case 30: {
224  if constexpr (debug)
225  printf("TBM error trailer (errorType = 30)\n");
226  int stateMatch_bits = 4;
227  int stateMatch_shift = 8;
228  uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits);
229  int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask;
230  if (stateMatch != 1 && stateMatch != 8) {
231  if constexpr (debug)
232  printf("FED error 30 with unexpected State Bits (errorType = 30)\n");
233  break;
234  }
235  if (stateMatch == 1)
236  errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30
237  errorFound = true;
238  break;
239  }
240  case 31: {
241  if constexpr (debug)
242  printf("Event number error (errorType = 31)\n");
243  errorFound = true;
244  break;
245  }
246  default:
247  errorFound = false;
248  };
249 
250  return errorFound ? errorType : 0;
251  }
252 
253  // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
254  template <bool debug = false>
255  ALPAKA_FN_ACC uint32_t
256  getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap) {
257  uint32_t rID = 0xffffffff;
258 
259  switch (errorType) {
260  case 25:
261  case 29:
262  case 30:
263  case 31:
264  case 36:
265  case 40: {
266  uint32_t roc = 1;
267  uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
268  uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
269  if (rID_temp != ::pixelClustering::invalidModuleId)
270  rID = rID_temp;
271  break;
272  }
273  case 37:
274  case 38: {
275  uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask;
276  uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
277  uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
278  if (rID_temp != ::pixelClustering::invalidModuleId)
279  rID = rID_temp;
280  break;
281  }
282  default:
283  break;
284  };
285 
286  return rID;
287  }
288 
289  // Kernel to perform Raw to Digi conversion
290  template <bool debug = false>
292  template <typename TAcc>
293  ALPAKA_FN_ACC void operator()(const TAcc &acc,
294  const SiPixelMappingSoAConstView &cablingMap,
295  const unsigned char *modToUnp,
296  const uint32_t wordCounter,
297  const uint32_t *word,
298  const uint8_t *fedIds,
299  SiPixelDigisSoAView digisView,
301  bool useQualityInfo,
302  bool includeErrors) const {
303  // FIXME there is no guarantee that this is initialised to 0 before any of the atomicInc happens
305  err.size() = 0;
306 
307  for (auto gIndex : cms::alpakatools::uniform_elements(acc, wordCounter)) {
308  auto dvgi = digisView[gIndex];
309  dvgi.xx() = 0;
310  dvgi.yy() = 0;
311  dvgi.adc() = 0;
312 
313  // initialise the errors
314  err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0};
315 
316  uint8_t fedId = fedIds[gIndex / 2]; // +1200;
317 
318  // initialize (too many coninue below)
319  dvgi.pdigi() = 0;
320  dvgi.rawIdArr() = 0;
321  dvgi.moduleId() = ::pixelClustering::invalidModuleId;
322 
323  uint32_t ww = word[gIndex]; // Array containing 32 bit raw data
324  if (ww == 0) {
325  // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization
326  continue;
327  }
328 
329  uint32_t link = sipixelconstants::getLink(ww); // Extract link
330  uint32_t roc = sipixelconstants::getROC(ww); // Extract ROC in link
331 
332  uint8_t errorType = checkROC<debug>(ww, fedId, link, cablingMap);
333  bool skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0);
334  if (includeErrors and skipROC) {
335  uint32_t rawId = getErrRawID<debug>(fedId, ww, errorType, cablingMap);
336  if (rawId != 0xffffffff) // Store errors only for valid DetIds
337  {
338  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, errorType, fedId};
339  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
340  }
341  continue;
342  }
343 
344  // Check for spurious channels
346  uint32_t rawId = getRawId(cablingMap, fedId, link, 1).rawId;
347  if constexpr (debug) {
348  printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex);
349  }
350  if (roc > ::pixelDetails::MAX_ROC and roc < 25) {
351  uint8_t error = conversionError<debug>(fedId, 2);
352  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
353  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
354  }
355  continue;
356  }
357 
358  uint32_t index =
360  if (useQualityInfo) {
361  skipROC = cablingMap.badRocs()[index];
362  if (skipROC)
363  continue;
364  }
365  skipROC = modToUnp[index];
366  if (skipROC)
367  continue;
368 
369  ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc);
370  uint32_t rawId = detId.rawId;
371  uint32_t layer = 0;
372  int side = 0, panel = 0, module = 0;
373  bool barrel = isBarrel(rawId);
374 
375  if (barrel) {
378  side = (module < 5) ? -1 : 1;
379  } else {
380  // endcap ids
381  layer = 0;
383  side = (panel == 1) ? -1 : 1;
384  }
385 
386  ::pixelDetails::Pixel localPix;
387  if (layer == 1) {
388  // Special case of barrel layer 1
389  uint32_t col = sipixelconstants::getCol(ww);
390  uint32_t row = sipixelconstants::getRow(ww);
391  localPix.row = row;
392  localPix.col = col;
393  if (includeErrors and not rocRowColIsValid(row, col)) {
394  uint8_t error = conversionError<debug>(fedId, 3);
395  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
396  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
397  if constexpr (debug)
398  printf("BPIX1 Error status: %i\n", error);
399  continue;
400  }
401  } else {
402  // Other layers with double columns
403  uint32_t dcol = sipixelconstants::getDCol(ww);
404  uint32_t pxid = sipixelconstants::getPxId(ww);
405  uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2;
406  uint32_t col = dcol * 2 + pxid % 2;
407  localPix.row = row;
408  localPix.col = col;
409  if (includeErrors and not dcolIsValid(dcol, pxid)) {
410  uint8_t error = conversionError<debug>(fedId, 3);
411  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
412  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
413  if constexpr (debug)
414  printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc);
415  continue;
416  }
417  }
418 
419  ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, detId.rocInDet, localPix);
420  dvgi.xx() = globalPix.row; // origin shifting by 1 0-159
421  dvgi.yy() = globalPix.col; // origin shifting by 1 0-415
422  dvgi.adc() = sipixelconstants::getADC(ww);
423  dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc());
424  dvgi.moduleId() = detId.moduleId;
425  dvgi.rawIdArr() = rawId;
426  } // end of stride on grid
427 
428  } // end of Raw to Digi kernel operator()
429  }; // end of Raw to Digi struct
430 
431  template <typename TrackerTraits>
433  template <typename TAcc>
434  ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const {
436 
437  // For Phase1 there are 1856 pixel modules
438  // For Phase2 there are 3872 pixel modules
439  // For whichever setup with more modules it would be
440  // easy to extend at least till 32*1024
441 
442  constexpr uint16_t prefixScanUpperLimit = isPhase2 ? 4096 : 2048;
443  ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < prefixScanUpperLimit);
444 
447 
448 #ifndef NDEBUG
449  [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
450  ALPAKA_ASSERT_ACC(0 == blockIdxLocal);
451  [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
452  ALPAKA_ASSERT_ACC(1 == gridDimension);
453 #endif
454 
455  // limit to maxHitsInModule;
457  clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule());
458  }
459 
460  constexpr auto leftModules = isPhase2 ? 1024 : numberOfModules - 1024;
461 
462  auto &&ws = alpaka::declareSharedVar<uint32_t[32], __COUNTER__>(acc);
463 
465  acc, clus_view.clusModuleStart() + 1, clus_view.clusModuleStart() + 1, 1024, ws);
466 
468  acc, clus_view.clusModuleStart() + 1024 + 1, clus_view.clusModuleStart() + 1024 + 1, leftModules, ws);
469 
470  if constexpr (isPhase2) {
472  acc, clus_view.clusModuleStart() + 2048 + 1, clus_view.clusModuleStart() + 2048 + 1, 1024, ws);
474  clus_view.clusModuleStart() + 3072 + 1,
475  clus_view.clusModuleStart() + 3072 + 1,
476  numberOfModules - 3072,
477  ws);
478  }
479 
480  constexpr auto lastModule = isPhase2 ? 2049u : numberOfModules + 1;
481  for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 1025u, lastModule)) {
482  clus_view[i].clusModuleStart() += clus_view[1024].clusModuleStart();
483  }
484  alpaka::syncBlockThreads(acc);
485 
486  if constexpr (isPhase2) {
487  for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 2049u, 3073u)) {
488  clus_view[i].clusModuleStart() += clus_view[2048].clusModuleStart();
489  }
490  alpaka::syncBlockThreads(acc);
491 
492  for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 3073u, numberOfModules + 1)) {
493  clus_view[i].clusModuleStart() += clus_view[3072].clusModuleStart();
494  }
495  alpaka::syncBlockThreads(acc);
496  }
497 #ifdef GPU_DEBUG
498  ALPAKA_ASSERT_ACC(0 == clus_view[1].moduleStart());
499  auto c0 = std::min(maxHitsInModule, clus_view[2].clusModuleStart());
500  ALPAKA_ASSERT_ACC(c0 == clus_view[2].moduleStart());
501  ALPAKA_ASSERT_ACC(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart());
502  ALPAKA_ASSERT_ACC(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart());
503  ALPAKA_ASSERT_ACC(clus_view[numberOfModules].moduleStart() >= clus_view[1025].moduleStart());
504 
506  if (0 != i)
507  ALPAKA_ASSERT_ACC(clus_view[i].moduleStart() >= clus_view[i - 1].moduleStart());
508  // Check BPX2 (1), FP1 (4)
509  constexpr auto bpix2 = TrackerTraits::layerStart[1];
510  constexpr auto fpix1 = TrackerTraits::layerStart[4];
511  if (i == bpix2 || i == fpix1)
512  printf("moduleStart %d %d\n", i, clus_view[i].moduleStart());
513  }
514 
515 #endif
516 
517  } // end of FillHitsModuleStart kernel operator()
518  }; // end of FillHitsModuleStart struct
519 
520  // Interface to outside
521  template <typename TrackerTraits>
523  Queue &queue,
524  const SiPixelClusterThresholds clusterThresholds,
525  const SiPixelMappingSoAConstView &cablingMap,
526  const unsigned char *modToUnp,
528  const WordFedAppender &wordFed,
529  const uint32_t wordCounter,
530  const uint32_t fedCounter,
531  bool useQualityInfo,
532  bool includeErrors,
533  bool debug) {
534  nDigis = wordCounter;
535 
536 #ifdef GPU_DEBUG
537  std::cout << "decoding " << wordCounter << " digis." << std::endl;
538 #endif
540  digis_d = SiPixelDigisSoACollection(wordCounter, queue);
541  if (includeErrors) {
542  digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue);
543  }
545  // protect in case of empty event....
546  if (wordCounter) {
547  const int threadsPerBlockOrElementsPerThread =
548  cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? 32 : 512;
549  // fill it all
550  const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread);
551  const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
552  assert(0 == wordCounter % 2);
553  // wordCounter is the total no of words in each event to be trasfered on device
554  auto word_d = cms::alpakatools::make_device_buffer<uint32_t[]>(queue, wordCounter);
555  // NB: IMPORTANT: fedId_d: In legacy, wordCounter elements are allocated.
556  // However, only the first half of elements end up eventually used:
557  // hence, here, only wordCounter/2 elements are allocated.
558  auto fedId_d = cms::alpakatools::make_device_buffer<uint8_t[]>(queue, wordCounter / 2);
559  alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter);
560  alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2);
561  // Launch rawToDigi kernel
562  if (debug) {
563  alpaka::exec<Acc1D>(queue,
564  workDiv,
566  cablingMap,
567  modToUnp,
568  wordCounter,
569  word_d.data(),
570  fedId_d.data(),
571  digis_d->view(),
572  digiErrors_d->view(),
573  useQualityInfo,
574  includeErrors);
575  } else {
576  alpaka::exec<Acc1D>(queue,
577  workDiv,
579  cablingMap,
580  modToUnp,
581  wordCounter,
582  word_d.data(),
583  fedId_d.data(),
584  digis_d->view(),
585  digiErrors_d->view(),
586  useQualityInfo,
587  includeErrors);
588  }
589 
590 #ifdef GPU_DEBUG
592  std::cout << "RawToDigi_kernel was run smoothly!" << std::endl;
593 #endif
594  }
595  // End of Raw2Digi and passing data for clustering
596 
597  {
598  // clusterizer
599  using namespace pixelClustering;
600  // calibrations
601  using namespace calibPixel;
602  const int threadsPerBlockOrElementsPerThread = []() {
603  if constexpr (std::is_same_v<Device, alpaka_common::DevHost>) {
604  // NB: MPORTANT: This could be tuned to benefit from innermost loop.
605  return 32;
606  } else {
607  return 256;
608  }
609  }();
610  const auto blocks = cms::alpakatools::divide_up_by(std::max<int>(wordCounter, numberOfModules),
611  threadsPerBlockOrElementsPerThread);
612  const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
613 
614  if (debug) {
615  alpaka::exec<Acc1D>(queue,
616  workDiv,
618  clusterThresholds,
619  digis_d->view(),
620  clusters_d->view(),
621  gains,
622  wordCounter);
623  } else {
624  alpaka::exec<Acc1D>(queue,
625  workDiv,
627  clusterThresholds,
628  digis_d->view(),
629  clusters_d->view(),
630  gains,
631  wordCounter);
632  }
633 #ifdef GPU_DEBUG
635  std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
636  << " threadsPerBlockOrElementsPerThread\n";
637 #endif
638 
639  alpaka::exec<Acc1D>(
640  queue, workDiv, CountModules<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
641 
642  auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u);
643  alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
644 
645  const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
646  const auto workDivMaxNumModules =
647  cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
648 #ifdef GPU_DEBUG
649  std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
650  << " threadsPerBlockOrElementsPerThread\n";
651 #endif
652  alpaka::exec<Acc1D>(
653  queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
654 #ifdef GPU_DEBUG
656 #endif
657 
658  constexpr auto threadsPerBlockChargeCut = 256;
659  const auto workDivChargeCut = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockChargeCut);
660  // apply charge cut
661  alpaka::exec<Acc1D>(queue,
662  workDivChargeCut,
664  digis_d->view(),
665  clusters_d->view(),
666  clusterThresholds,
667  wordCounter);
668  // count the module start indices already here (instead of
669  // rechits) so that the number of clusters/hits can be made
670  // available in the rechit producer without additional points of
671  // synchronization/ExternalWork
672 
673  // MUST be ONE block
674  const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
675  alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
676 
677  // last element holds the number of all clusters
678  const auto clusModuleStartLastElement =
679  cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
681 
682  // element startBPIX2 hold the number of clusters until BPIX2
683  const auto bpix2ClusterStart =
684  cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
685  auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
686  alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
687 
688  auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
689  alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
690 
691 #ifdef GPU_DEBUG
693  std::cout << "SiPixelClusterizerAlpaka results:" << std::endl
694  << " > no. of digis: " << nDigis << std::endl
695  << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
696  << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
697  << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
698 #endif
699 
700  } // end clusterizer scope
701  }
702 
703  template <typename TrackerTraits>
705  Queue &queue,
706  const SiPixelClusterThresholds clusterThresholds,
707  SiPixelDigisSoAView &digis_view,
708  const uint32_t numDigis) {
709  using namespace pixelClustering;
710  using pixelTopology::Phase2;
711  nDigis = numDigis;
714  const auto threadsPerBlockOrElementsPerThread = 512;
715  const auto blocks =
716  cms::alpakatools::divide_up_by(std::max<int>(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread);
717  const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
718 
719  alpaka::exec<Acc1D>(
720  queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis);
721 
722 #ifdef GPU_DEBUG
724  std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
725  << " threadsPerBlockOrElementsPerThread\n";
726 #endif
727  alpaka::exec<Acc1D>(
728  queue, workDiv, CountModules<pixelTopology::Phase2>{}, digis_view, clusters_d->view(), numDigis);
729 
730  auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u);
731  alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
732 
733  const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
734  const auto workDivMaxNumModules =
735  cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
736 #ifdef GPU_DEBUG
738  std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
739  << " threadsPerBlockOrElementsPerThread\n";
740 #endif
741  alpaka::exec<Acc1D>(
742  queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_view, clusters_d->view(), numDigis);
743 #ifdef GPU_DEBUG
745 #endif
746 
747  // apply charge cut
748  alpaka::exec<Acc1D>(queue,
749  workDivMaxNumModules,
751  digis_view,
752  clusters_d->view(),
753  clusterThresholds,
754  numDigis);
755 
756  // count the module start indices already here (instead of
757  // rechits) so that the number of clusters/hits can be made
758  // available in the rechit producer without additional points of
759  // synchronization/ExternalWork
760 
761  // MUST be ONE block
762  const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
763  alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
764 
765  // last element holds the number of all clusters
766  const auto clusModuleStartLastElement =
767  cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
769  // element startBPIX2 hold the number of clusters until BPIX2
770  const auto bpix2ClusterStart =
771  cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
772  auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
773  alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
774 
775  auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
776  alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
777 
778 #ifdef GPU_DEBUG
780  std::cout << "SiPixelPhase2DigiToCluster: results \n"
781  << " > no. of digis: " << numDigis << std::endl
782  << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
783  << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
784  << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
785 #endif
786  } //
787 
791 
792  } // namespace pixelDetails
793 
794 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
def pack(high, low)
void makePhase1ClustersAsync(Queue &queue, const SiPixelClusterThresholds clusterThresholds, const SiPixelMappingSoAConstView &cablingMap, const unsigned char *modToUnp, const SiPixelGainCalibrationForHLTSoAConstView &gains, const WordFedAppender &wordFed, const uint32_t wordCounter, const uint32_t fedCounter, bool useQualityInfo, bool includeErrors, bool debug)
constexpr uint32_t moduleStartBit
constexpr uint32_t getRow(uint32_t ww)
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
constexpr uint32_t ERROR_mask
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
constexpr uint32_t getCol(uint32_t ww)
constexpr auto MAX_LINK
ALPAKA_FN_ACC auto independent_group_elements(TAcc const &acc, TArgs... args)
constexpr uint32_t layerMask
constexpr uint32_t numRowsInRoc
constexpr uint32_t ROC_shift
static const int kSubdetOffset
Definition: DetId.h:22
assert(be >=bs)
std::enable_if_t< alpaka::isDevice< TDev > and not std::is_array_v< T >, device_view< TDev, T > > make_device_view(TDev const &device, T &data)
Definition: memory.h:260
static constexpr uint16_t numberOfModules
void makePhase2ClustersAsync(Queue &queue, const SiPixelClusterThresholds clusterThresholds, SiPixelDigisSoAView &digis_view, const uint32_t numDigis)
constexpr uint16_t numberOfModules
ALPAKA_FN_ACC uint32_t getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap)
ALPAKA_ACCELERATOR_NAMESPACE::Queue Queue
Definition: LSTEvent.dev.cc:14
constexpr uint32_t panelMask
ALPAKA_FN_ACC void operator()(const TAcc &acc, const SiPixelMappingSoAConstView &cablingMap, const unsigned char *modToUnp, const uint32_t wordCounter, const uint32_t *word, const uint8_t *fedIds, SiPixelDigisSoAView digisView, SiPixelDigiErrorsSoAView err, bool useQualityInfo, bool includeErrors) const
constexpr uint32_t getPxId(uint32_t ww)
uint64_t word
static constexpr uint32_t const * layerStart
ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local)
constexpr int startBPIX2
static const int kSubdetMask
Definition: DetId.h:20
ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid)
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
constexpr uint32_t getDCol(uint32_t ww)
constexpr uint32_t OMIT_ERR_shift
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::conditional_t< std::is_same_v< Device, alpaka::DevCpu >, SiPixelDigisHost, SiPixelDigisDevice< Device > > SiPixelDigisSoACollection
constexpr uint32_t getADC(uint32_t ww)
std::vector< Block > Blocks
Definition: Block.h:99
constexpr uint32_t getLink(uint32_t ww)
constexpr unsigned int MAX_ROC
constexpr uint32_t ROC_mask
constexpr uint32_t moduleMask
constexpr uint32_t maxROCIndex
constexpr uint16_t invalidModuleId
ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap, uint8_t fed, uint32_t link, uint32_t roc)
constexpr uint32_t getROC(uint32_t ww)
constexpr auto MAX_ROC
constexpr float gains[NGAINS]
Definition: EcalConstants.h:20
ALPAKA_FN_ACC uint8_t checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap)
#define debug
Definition: HDRShower.cc:19
constexpr uint32_t LINK_shift
constexpr uint32_t panelStartBit
ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol)
constexpr uint32_t layerStartBit
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
ALPAKA_FN_ACC constexpr bool once_per_grid(TAcc const &acc)
ALPAKA_FN_ACC ALPAKA_FN_INLINE void blockPrefixScan(const TAcc &acc, T const *ci, T *co, int32_t size, T *ws=nullptr)
Definition: prefixScan.h:47
static constexpr uint32_t layerStart[numberOfLayers+1]
constexpr uint32_t OMIT_ERR_mask
col
Definition: cuy.py:1009
ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status)
std::conditional_t< std::is_same_v< Device, alpaka::DevCpu >, SiPixelDigiErrorsHost, SiPixelDigiErrorsDevice< Device > > SiPixelDigiErrorsSoACollection
constexpr uint32_t numColsInRoc
std::enable_if_t< not std::is_array_v< T >, host_view< T > > make_host_view(T &data)
Definition: memory.h:153
ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const
constexpr uint32_t LINK_mask
constexpr unsigned int MAX_LINK
constexpr uint16_t invalidModuleId
std::conditional_t< std::is_same_v< Device, alpaka::DevCpu >, SiPixelClustersHost, SiPixelClustersDevice< Device > > SiPixelClustersSoACollection
constexpr uint32_t maxHitsInModule()