CMS 3D CMS Logo

SiPixelRawToClusterKernel.dev.cc
Go to the documentation of this file.
1 // C++ includes
2 #include <algorithm>
3 #include <cassert>
4 #include <chrono>
5 #include <cstdio>
6 #include <cstdlib>
7 #include <cstring>
8 #include <fstream>
9 #include <iomanip>
10 #include <iostream>
11 #include <string>
12 #include <utility>
13 
14 // CMSSW includes
21 
25 
26 // local includes
27 #include "CalibPixel.h"
28 #include "ClusterChargeCut.h"
29 #include "PixelClustering.h"
31 
32 // #define GPU_DEBUG
33 
35  namespace pixelDetails {
36 
38 
39  ALPAKA_FN_ACC uint32_t getLink(uint32_t ww) {
41  }
42 
43  ALPAKA_FN_ACC uint32_t getRoc(uint32_t ww) {
45  }
46 
47  ALPAKA_FN_ACC uint32_t getADC(uint32_t ww) {
49  }
50 
51  ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) { return (1 == ((rawId >> 25) & 0x7)); }
52 
53  ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap,
54  uint8_t fed,
55  uint32_t link,
56  uint32_t roc) {
57  using namespace ::pixelDetails;
58  uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc;
60  cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]};
61  return detId;
62  }
63 
64  //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html
65  //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071
66  // Convert local pixel to pixelDetails::global pixel
67  ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(
68  bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) {
69  int slopeRow = 0, slopeCol = 0;
70  int rowOffset = 0, colOffset = 0;
71 
72  if (bpix) {
73  if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1
74  if (rocIdInDetUnit < 8) {
75  slopeRow = 1;
76  slopeCol = -1;
77  rowOffset = 0;
78  colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
79  } else {
80  slopeRow = -1;
81  slopeCol = 1;
82  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
83  colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
84  } // if roc
85  } else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1
86  if (rocIdInDetUnit < 8) {
87  slopeRow = -1;
88  slopeCol = 1;
89  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
90  colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc;
91  } else {
92  slopeRow = 1;
93  slopeCol = -1;
94  rowOffset = 0;
95  colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
96  }
97  }
98 
99  } else { // fpix
100  if (side == -1) { // pannel 1
101  if (rocIdInDetUnit < 8) {
102  slopeRow = 1;
103  slopeCol = -1;
104  rowOffset = 0;
105  colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
106  } else {
107  slopeRow = -1;
108  slopeCol = 1;
109  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
110  colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
111  }
112  } else { // pannel 2
113  if (rocIdInDetUnit < 8) {
114  slopeRow = 1;
115  slopeCol = -1;
116  rowOffset = 0;
117  colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
118  } else {
119  slopeRow = -1;
120  slopeCol = 1;
121  rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
122  colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
123  }
124 
125  } // side
126  }
127 
128  uint32_t gRow = rowOffset + slopeRow * local.row;
129  uint32_t gCol = colOffset + slopeCol * local.col;
130  ::pixelDetails::Pixel global = {gRow, gCol};
131  return global;
132  }
133 
134  ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status, bool debug = false) {
135  uint8_t errorType = 0;
136 
137  switch (status) {
138  case 1: {
139  if (debug)
140  printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId);
141  errorType = 35;
142  break;
143  }
144  case 2: {
145  if (debug)
146  printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId);
147  errorType = 36;
148  break;
149  }
150  case 3: {
151  if (debug)
152  printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId);
153  errorType = 37;
154  break;
155  }
156  case 4: {
157  if (debug)
158  printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId);
159  errorType = 38;
160  break;
161  }
162  default:
163  if (debug)
164  printf("Cabling check returned unexpected result, status = %i\n", status);
165  };
166 
167  return errorType;
168  }
169 
170  ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) {
171  uint32_t numRowsInRoc = 80;
172  uint32_t numColsInRoc = 52;
173 
175  return ((rocRow < numRowsInRoc) & (rocCol < numColsInRoc));
176  }
177 
178  ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); }
179 
180  ALPAKA_FN_ACC uint8_t checkROC(uint32_t errorWord,
181  uint8_t fedId,
182  uint32_t link,
183  const SiPixelMappingSoAConstView &cablingMap,
184  bool debug = false) {
185  uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask;
186  if (errorType < 25)
187  return 0;
188  bool errorFound = false;
189 
190  switch (errorType) {
191  case (25): {
192  errorFound = true;
193  uint32_t index =
195  if (index > 1 && index <= cablingMap.size()) {
196  if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index]))
197  errorFound = false;
198  }
199  if (debug and errorFound)
200  printf("Invalid ROC = 25 found (errorType = 25)\n");
201  break;
202  }
203  case (26): {
204  if (debug)
205  printf("Gap word found (errorType = 26)\n");
206  errorFound = true;
207  break;
208  }
209  case (27): {
210  if (debug)
211  printf("Dummy word found (errorType = 27)\n");
212  errorFound = true;
213  break;
214  }
215  case (28): {
216  if (debug)
217  printf("Error fifo nearly full (errorType = 28)\n");
218  errorFound = true;
219  break;
220  }
221  case (29): {
222  if (debug)
223  printf("Timeout on a channel (errorType = 29)\n");
225  if (debug)
226  printf("...first errorType=29 error, this gets masked out\n");
227  }
228  errorFound = true;
229  break;
230  }
231  case (30): {
232  if (debug)
233  printf("TBM error trailer (errorType = 30)\n");
234  int StateMatch_bits = 4;
235  int StateMatch_shift = 8;
236  uint32_t StateMatch_mask = ~(~uint32_t(0) << StateMatch_bits);
237  int StateMatch = (errorWord >> StateMatch_shift) & StateMatch_mask;
238  if (StateMatch != 1 && StateMatch != 8) {
239  if (debug)
240  printf("FED error 30 with unexpected State Bits (errorType = 30)\n");
241  }
242  if (StateMatch == 1)
243  errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30
244  errorFound = true;
245  break;
246  }
247  case (31): {
248  if (debug)
249  printf("Event number error (errorType = 31)\n");
250  errorFound = true;
251  break;
252  }
253  default:
254  errorFound = false;
255  };
256 
257  return errorFound ? errorType : 0;
258  }
259 
260  ALPAKA_FN_ACC uint32_t getErrRawID(uint8_t fedId,
261  uint32_t errWord,
262  uint32_t errorType,
263  const SiPixelMappingSoAConstView &cablingMap,
264  bool debug = false) {
265  uint32_t rID = 0xffffffff;
266 
267  switch (errorType) {
268  case 25:
269  case 30:
270  case 31:
271  case 36:
272  case 40: {
273  uint32_t roc = 1;
274  uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
275  uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId;
276  if (rID_temp != 9999)
277  rID = rID_temp;
278  break;
279  }
280  case 29: {
281  int chanNmbr = 0;
282  const int DB0_shift = 0;
283  const int DB1_shift = DB0_shift + 1;
284  const int DB2_shift = DB1_shift + 1;
285  const int DB3_shift = DB2_shift + 1;
286  const int DB4_shift = DB3_shift + 1;
287  const uint32_t DataBit_mask = ~(~uint32_t(0) << 1);
288 
289  int CH1 = (errWord >> DB0_shift) & DataBit_mask;
290  int CH2 = (errWord >> DB1_shift) & DataBit_mask;
291  int CH3 = (errWord >> DB2_shift) & DataBit_mask;
292  int CH4 = (errWord >> DB3_shift) & DataBit_mask;
293  int CH5 = (errWord >> DB4_shift) & DataBit_mask;
294  int BLOCK_bits = 3;
295  int BLOCK_shift = 8;
296  uint32_t BLOCK_mask = ~(~uint32_t(0) << BLOCK_bits);
297  int BLOCK = (errWord >> BLOCK_shift) & BLOCK_mask;
298  int localCH = 1 * CH1 + 2 * CH2 + 3 * CH3 + 4 * CH4 + 5 * CH5;
299  if (BLOCK % 2 == 0)
300  chanNmbr = (BLOCK / 2) * 9 + localCH;
301  else
302  chanNmbr = ((BLOCK - 1) / 2) * 9 + 4 + localCH;
303  if ((chanNmbr < 1) || (chanNmbr > 36))
304  break; // signifies unexpected result
305 
306  uint32_t roc = 1;
307  uint32_t link = chanNmbr;
308  uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId;
309  if (rID_temp != 9999)
310  rID = rID_temp;
311  break;
312  }
313  case 37:
314  case 38: {
315  uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask;
316  uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
317  uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId;
318  if (rID_temp != 9999)
319  rID = rID_temp;
320  break;
321  }
322  default:
323  break;
324  };
325 
326  return rID;
327  }
328 
329  // Kernel to perform Raw to Digi conversion
331  template <typename TAcc>
332  ALPAKA_FN_ACC void operator()(const TAcc &acc,
333  const SiPixelMappingSoAConstView &cablingMap,
334  const unsigned char *modToUnp,
335  const uint32_t wordCounter,
336  const uint32_t *word,
337  const uint8_t *fedIds,
338  SiPixelDigisSoAView digisView,
340  bool useQualityInfo,
341  bool includeErrors,
342  bool debug) const {
343  cms::alpakatools::for_each_element_in_grid_strided(acc, wordCounter, [&](uint32_t iloop) {
344  auto gIndex = iloop;
345  auto dvgi = digisView[gIndex];
346  dvgi.xx() = 0;
347  dvgi.yy() = 0;
348  dvgi.adc() = 0;
349  bool skipROC = false;
350 
351  if (gIndex == 0)
352  err[gIndex].size() = 0;
353 
354  err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0};
355 
356  uint8_t fedId = fedIds[gIndex / 2]; // +1200;
357 
358  // initialize (too many coninue below)
359  dvgi.pdigi() = 0;
360  dvgi.rawIdArr() = 0;
362  dvgi.moduleId() = invalidModuleId;
363 
364  uint32_t ww = word[gIndex]; // Array containing 32 bit raw data
365  if (ww == 0) {
366  // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization
367  return;
368  }
369 
370  uint32_t link = getLink(ww); // Extract link
371  uint32_t roc = getRoc(ww); // Extract Roc in link
372  ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc);
373 
374  uint8_t errorType = checkROC(ww, fedId, link, cablingMap, debug);
375  skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0);
376  if (includeErrors and skipROC) {
377  uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug);
378  err[gIndex].pixelErrors() = SiPixelErrorCompact{rID, ww, errorType, fedId};
379  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Threads{});
380  return;
381  }
382 
383  uint32_t rawId = detId.RawId;
384  uint32_t rocIdInDetUnit = detId.rocInDet;
385  bool barrel = isBarrel(rawId);
386 
387  uint32_t index =
389  if (useQualityInfo) {
390  skipROC = cablingMap.badRocs()[index];
391  if (skipROC)
392  return;
393  }
394  skipROC = modToUnp[index];
395  if (skipROC)
396  return;
397 
398  uint32_t layer = 0; //, ladder =0;
399  int side = 0, panel = 0, module = 0; //disk = 0, blade = 0
400 
401  if (barrel) {
404  side = (module < 5) ? -1 : 1;
405  } else {
406  // endcap ids
407  layer = 0;
409  //disk = (rawId >> diskStartBit_) & diskMask_;
410  side = (panel == 1) ? -1 : 1;
411  //blade = (rawId >> bladeStartBit_) & bladeMask_;
412  }
413 
414  // ***special case of layer to 1 be handled here
415  ::pixelDetails::Pixel localPix;
416  if (layer == 1) {
418  uint32_t row = (ww >> ::pixelDetails::ROW_shift) & ::pixelDetails::ROW_mask;
419  localPix.row = row;
420  localPix.col = col;
421  if (includeErrors) {
422  if (not rocRowColIsValid(row, col)) {
423  uint8_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays
424  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
425  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Threads{});
426  if (debug)
427  printf("BPIX1 Error status: %i\n", error);
428  return;
429  }
430  }
431  } else {
432  // ***conversion rules for dcol and pxid
433  uint32_t dcol = (ww >> ::pixelDetails::DCOL_shift) & ::pixelDetails::DCOL_mask;
434  uint32_t pxid = (ww >> ::pixelDetails::PXID_shift) & ::pixelDetails::PXID_mask;
435  uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2;
436  uint32_t col = dcol * 2 + pxid % 2;
437  localPix.row = row;
438  localPix.col = col;
439  if (includeErrors and not dcolIsValid(dcol, pxid)) {
440  uint8_t error = conversionError(fedId, 3, debug);
441  err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
442  alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Threads{});
443  if (debug)
444  printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc);
445  return;
446  }
447  }
448 
449  ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix);
450  dvgi.xx() = globalPix.row; // origin shifting by 1 0-159
451  dvgi.yy() = globalPix.col; // origin shifting by 1 0-415
452  dvgi.adc() = getADC(ww);
453  dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc());
454  dvgi.moduleId() = detId.moduleId;
455  dvgi.rawIdArr() = rawId;
456  }); // end of stride on grid
457 
458  } // end of Raw to Digi kernel operator()
459  }; // end of Raw to Digi struct
460 
461  template <typename TrackerTraits>
463  template <typename TAcc>
464  ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const {
465  ALPAKA_ASSERT_OFFLOAD(TrackerTraits::numberOfModules < 2048); // easy to extend at least till 32*1024
466 
469 
470 #ifndef NDEBUG
471  [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
472  ALPAKA_ASSERT_OFFLOAD(0 == blockIdxLocal);
473  [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
475 #endif
476 
477  // limit to maxHitsInModule;
479  clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule());
480  });
481 
483  constexpr auto leftModules = isPhase2 ? 1024 : nMaxModules - 1024;
484 
485  auto &&ws = alpaka::declareSharedVar<uint32_t[32], __COUNTER__>(acc);
486 
488  acc, clus_view.clusModuleStart() + 1, clus_view.clusModuleStart() + 1, 1024, ws);
489 
491  acc, clus_view.clusModuleStart() + 1024 + 1, clus_view.clusModuleStart() + 1024 + 1, leftModules, ws);
492 
493  if constexpr (isPhase2) {
495  acc, clus_view.clusModuleStart() + 2048 + 1, clus_view.clusModuleStart() + 2048 + 1, 1024, ws);
497  clus_view.clusModuleStart() + 3072 + 1,
498  clus_view.clusModuleStart() + 3072 + 1,
499  nMaxModules - 3072,
500  ws);
501  }
502 
503  constexpr auto lastModule = isPhase2 ? 2049u : nMaxModules + 1;
504  cms::alpakatools::for_each_element_in_block_strided(acc, lastModule, 1025u, [&](uint32_t i) {
505  clus_view[i].clusModuleStart() += clus_view[1024].clusModuleStart();
506  });
507  alpaka::syncBlockThreads(acc);
508 
509  if constexpr (isPhase2) {
510  cms::alpakatools::for_each_element_in_block_strided(acc, 3073u, 2049u, [&](uint32_t i) {
511  clus_view[i].clusModuleStart() += clus_view[2048].clusModuleStart();
512  });
513  alpaka::syncBlockThreads(acc);
514 
515  cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules + 1, 3073u, [&](uint32_t i) {
516  clus_view[i].clusModuleStart() += clus_view[3072].clusModuleStart();
517  });
518  alpaka::syncBlockThreads(acc);
519  }
520 #ifdef GPU_DEBUG
521  ALPAKA_ASSERT_OFFLOAD(0 == clus_view[0].moduleStart());
522  auto c0 = std::min(maxHitsInModule, clus_view[1].clusModuleStart());
523  ALPAKA_ASSERT_OFFLOAD(c0 == clus_view[1].moduleStart());
524  ALPAKA_ASSERT_OFFLOAD(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart());
525  ALPAKA_ASSERT_OFFLOAD(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart());
526  ALPAKA_ASSERT_OFFLOAD(clus_view[nMaxModules].moduleStart() >= clus_view[1025].moduleStart());
527 
529  if (0 != i)
530  ALPAKA_ASSERT_OFFLOAD(clus_view[i].moduleStart() >= clus_view[i - i].moduleStart());
531  // Check BPX2 (1), FP1 (4)
532  constexpr auto bpix2 = TrackerTraits::layerStart[1];
533  constexpr auto fpix1 = TrackerTraits::layerStart[4];
534  if (i == bpix2 || i == fpix1)
535  printf("moduleStart %d %d\n", i, clus_view[i].moduleStart());
536  });
537 #endif
538  // avoid overflow
541  if (clus_view[i].clusModuleStart() > MAX_HITS)
542  clus_view[i].clusModuleStart() = MAX_HITS;
543  });
544 
545  } // end of FillHitsModuleStart kernel operator()
546  }; // end of FillHitsModuleStart struct
547 
548  // Interface to outside
549  template <typename TrackerTraits>
551  Queue &queue,
552  const SiPixelClusterThresholds clusterThresholds,
553  const SiPixelMappingSoAConstView &cablingMap,
554  const unsigned char *modToUnp,
556  const WordFedAppender &wordFed,
557  const uint32_t wordCounter,
558  const uint32_t fedCounter,
559  bool useQualityInfo,
560  bool includeErrors,
561  bool debug) {
562  nDigis = wordCounter;
563 
564 #ifdef GPU_DEBUG
565  std::cout << "decoding " << wordCounter << " digis." << std::endl;
566 #endif
568  digis_d = SiPixelDigisSoACollection(wordCounter, queue);
569  if (includeErrors) {
570  digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue);
571  }
573  // protect in case of empty event....
574  if (wordCounter) {
575  const int threadsPerBlockOrElementsPerThread =
576  cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? 32 : 512;
577  // fill it all
578  const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread);
579  const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
580  assert(0 == wordCounter % 2);
581  // wordCounter is the total no of words in each event to be trasfered on device
582  auto word_d = cms::alpakatools::make_device_buffer<uint32_t[]>(queue, wordCounter);
583  // NB: IMPORTANT: fedId_d: In legacy, wordCounter elements are allocated.
584  // However, only the first half of elements end up eventually used:
585  // hence, here, only wordCounter/2 elements are allocated.
586  auto fedId_d = cms::alpakatools::make_device_buffer<uint8_t[]>(queue, wordCounter / 2);
587  alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter);
588  alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2);
589  // Launch rawToDigi kernel
590  alpaka::exec<Acc1D>(queue,
591  workDiv,
593  cablingMap,
594  modToUnp,
595  wordCounter,
596  word_d.data(),
597  fedId_d.data(),
598  digis_d->view(),
599  digiErrors_d->view(),
600  useQualityInfo,
602  debug);
603 
604 #ifdef GPU_DEBUG
606  std::cout << "RawToDigi_kernel was run smoothly!" << std::endl;
607 #endif
608  }
609  // End of Raw2Digi and passing data for clustering
610 
611  {
612  // clusterizer
613  using namespace pixelClustering;
614  // calibrations
615  using namespace calibPixel;
616  const int threadsPerBlockOrElementsPerThread = []() {
617  if constexpr (std::is_same_v<Device, alpaka_common::DevHost>) {
618  // NB: MPORTANT: This could be tuned to benefit from innermost loop.
619  return 32;
620  } else {
621  return 256;
622  }
623  }();
624  const auto blocks = cms::alpakatools::divide_up_by(std::max<int>(wordCounter, numberOfModules),
625  threadsPerBlockOrElementsPerThread);
626  const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
627 
628  alpaka::exec<Acc1D>(
629  queue, workDiv, CalibDigis{}, clusterThresholds, digis_d->view(), clusters_d->view(), gains, wordCounter);
630 
631 #ifdef GPU_DEBUG
633  std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
634  << " threadsPerBlockOrElementsPerThread\n";
635 #endif
636 
637  alpaka::exec<Acc1D>(
638  queue, workDiv, CountModules<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
639 
640  auto moduleStartFirstElement =
641  cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u);
642  alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
643 
644  // TODO
645  // - we are fixing this here since it needs to be needed
646  // at compile time also in the kernel (for_each_element_in_block_strided)
647  // - put maxIter in the Geometry traits
648  constexpr auto threadsOrElementsFindClus = 256;
649 
650  const auto workDivMaxNumModules =
651  cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsOrElementsFindClus);
652  // NB: With present FindClus() / chargeCut() algorithm,
653  // threadPerBlock (GPU) or elementsPerThread (CPU) = 256 show optimal performance.
654  // Though, it does not have to be the same number for CPU/GPU cases.
655 
656 #ifdef GPU_DEBUG
657  std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << threadsOrElementsFindClus
658  << " threadsPerBlockOrElementsPerThread\n";
659 #endif
660 
661  alpaka::exec<Acc1D>(
662  queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
663 
664 #ifdef GPU_DEBUG
666 #endif
667 
668  constexpr auto threadsPerBlockChargeCut = 256;
669  const auto workDivChargeCut = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockChargeCut);
670  // apply charge cut
671  alpaka::exec<Acc1D>(queue,
672  workDivChargeCut,
674  digis_d->view(),
675  clusters_d->view(),
676  clusterThresholds,
677  wordCounter);
678  // count the module start indices already here (instead of
679  // rechits) so that the number of clusters/hits can be made
680  // available in the rechit producer without additional points of
681  // synchronization/ExternalWork
682 
683  // MUST be ONE block
684  const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
685  alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
686 
687  // last element holds the number of all clusters
688  const auto clusModuleStartLastElement = cms::alpakatools::make_device_view(
689  alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
691 
692  // element startBPIX2 hold the number of clusters until BPIX2
693  const auto bpix2ClusterStart = cms::alpakatools::make_device_view(
694  alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
695  auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
696  alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
697 
698  auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
699  alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
700 
701 #ifdef GPU_DEBUG
703  std::cout << "SiPixelClusterizerAlpaka results:" << std::endl
704  << " > no. of digis: " << nDigis << std::endl
705  << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
706  << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
707  << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
708 #endif
709 
710  } // end clusterizer scope
711  }
712 
713  template <typename TrackerTraits>
715  Queue &queue,
716  const SiPixelClusterThresholds clusterThresholds,
717  SiPixelDigisSoAView &digis_view,
718  const uint32_t numDigis) {
719  using namespace pixelClustering;
720  using pixelTopology::Phase2;
721  nDigis = numDigis;
724  const auto threadsPerBlockOrElementsPerThread = 512;
725  const auto blocks =
726  cms::alpakatools::divide_up_by(std::max<int>(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread);
727  const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
728 
729  alpaka::exec<Acc1D>(
730  queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis);
731 
732 #ifdef GPU_DEBUG
734  std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
735  << " threadsPerBlockOrElementsPerThread\n";
736 #endif
737  alpaka::exec<Acc1D>(
738  queue, workDiv, CountModules<pixelTopology::Phase2>{}, digis_view, clusters_d->view(), numDigis);
739 
740  auto moduleStartFirstElement =
741  cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u);
742  alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
743 
745 
746  const auto threadsPerBlockFindClus = 256;
747  const auto workDivMaxNumModules = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockFindClus);
748 
749 #ifdef GPU_DEBUG
751  std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << threadsPerBlockFindClus
752  << " threadsPerBlockOrElementsPerThread\n";
753 #endif
754  alpaka::exec<Acc1D>(
755  queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_view, clusters_d->view(), numDigis);
756 #ifdef GPU_DEBUG
758 #endif
759 
760  // apply charge cut
761  alpaka::exec<Acc1D>(queue,
762  workDivMaxNumModules,
764  digis_view,
765  clusters_d->view(),
766  clusterThresholds,
767  numDigis);
768 
769  // count the module start indices already here (instead of
770  // rechits) so that the number of clusters/hits can be made
771  // available in the rechit producer without additional points of
772  // synchronization/ExternalWork
773 
774  // MUST be ONE block
775  const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
776  alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
777 
778  // last element holds the number of all clusters
779  const auto clusModuleStartLastElement = cms::alpakatools::make_device_view(
780  alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
782  // element startBPIX2 hold the number of clusters until BPIX2
783  const auto bpix2ClusterStart = cms::alpakatools::make_device_view(
784  alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
785  auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
786  alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
787 
788  auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
789  alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
790 
791 #ifdef GPU_DEBUG
793  std::cout << "SiPixelPhase2DigiToCluster: results \n"
794  << " > no. of digis: " << numDigis << std::endl
795  << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
796  << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
797  << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
798 #endif
799  } //
800 
804 
805  } // namespace pixelDetails
806 
807 } // namespace ALPAKA_ACCELERATOR_NAMESPACE
const uint32_t gridDimension(alpaka::getWorkDiv< alpaka::Grid, alpaka::Blocks >(acc)[0u])
def pack(high, low)
void makePhase1ClustersAsync(Queue &queue, const SiPixelClusterThresholds clusterThresholds, const SiPixelMappingSoAConstView &cablingMap, const unsigned char *modToUnp, const SiPixelGainCalibrationForHLTSoAConstView &gains, const WordFedAppender &wordFed, const uint32_t wordCounter, const uint32_t fedCounter, bool useQualityInfo, bool includeErrors, bool debug)
constexpr uint32_t moduleStartBit
constexpr int nMaxModules
ALPAKA_FN_ACC uint32_t getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap, bool debug=false)
constexpr uint32_t DCOL_shift
constexpr uint32_t DCOL_mask
constexpr uint32_t PXID_shift
constexpr uint32_t ERROR_mask
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:19
constexpr uint32_t layerMask
constexpr uint32_t numRowsInRoc
constexpr uint32_t ROC_shift
ALPAKA_FN_ACC void operator()(const TAcc &acc, const SiPixelMappingSoAConstView &cablingMap, const unsigned char *modToUnp, const uint32_t wordCounter, const uint32_t *word, const uint8_t *fedIds, SiPixelDigisSoAView digisView, SiPixelDigiErrorsSoAView err, bool useQualityInfo, bool includeErrors, bool debug) const
constexpr uint32_t ADC_shift
std::enable_if_t< not std::is_array_v< T >, device_view< TDev, T > > make_device_view(TDev const &device, T &data)
Definition: memory.h:252
assert(be >=bs)
static constexpr uint16_t numberOfModules
void makePhase2ClustersAsync(Queue &queue, const SiPixelClusterThresholds clusterThresholds, SiPixelDigisSoAView &digis_view, const uint32_t numDigis)
constexpr uint16_t numberOfModules
constexpr uint32_t panelMask
constexpr uint32_t OMIT_ERR_shift
uint64_t word
static constexpr uint32_t const * layerStart
ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local)
ALPAKA_FN_ACC void for_each_element_in_block_strided(const TAcc &acc, const Idx maxNumberOfElements, const Idx elementIdxShift, const Func func, const unsigned int dimIndex=0)
Definition: workdivision.h:936
constexpr int startBPIX2
constexpr uint32_t OMIT_ERR_mask
ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid)
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
ALPAKA_FN_ACC void for_each_element_in_grid_strided(const TAcc &acc, const Idx maxNumberOfElements, const Idx elementIdxShift, const Func func, const unsigned int dimIndex=0)
Definition: workdivision.h:975
std::conditional_t< std::is_same_v< Device, alpaka::DevCpu >, SiPixelDigisHost, SiPixelDigisDevice< Device > > SiPixelDigisSoACollection
constexpr unsigned int MAX_ROC
constexpr uint32_t ROC_mask
constexpr uint32_t moduleMask
constexpr uint32_t maxROCIndex
constexpr uint16_t invalidModuleId
ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap, uint8_t fed, uint32_t link, uint32_t roc)
constexpr auto MAX_ROC
constexpr float gains[NGAINS]
Definition: EcalConstants.h:20
#define debug
Definition: HDRShower.cc:19
constexpr uint32_t LINK_shift
constexpr uint32_t panelStartBit
ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol)
constexpr uint32_t LINK_mask
constexpr uint32_t layerStartBit
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
ALPAKA_FN_ACC ALPAKA_FN_INLINE void blockPrefixScan(const TAcc &acc, T const *ci, T *co, int32_t size, T *ws=nullptr)
Definition: prefixScan.h:47
static constexpr uint32_t layerStart[numberOfLayers+1]
ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status, bool debug=false)
col
Definition: cuy.py:1009
constexpr uint32_t COL_mask
std::conditional_t< std::is_same_v< Device, alpaka::DevCpu >, SiPixelDigiErrorsHost, SiPixelDigiErrorsDevice< Device > > SiPixelDigiErrorsSoACollection
constexpr uint32_t numColsInRoc
constexpr uint32_t ADC_mask
constexpr uint32_t ROC_mask
std::enable_if_t< not std::is_array_v< T >, host_view< T > > make_host_view(T &data)
Definition: memory.h:145
ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const
constexpr uint32_t ROW_mask
constexpr uint32_t LINK_mask
ALPAKA_FN_ACC uint8_t checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap, bool debug=false)
constexpr unsigned int MAX_LINK
constexpr uint32_t PXID_mask
std::conditional_t< std::is_same_v< Device, alpaka::DevCpu >, SiPixelClustersHost, SiPixelClustersDevice< Device > > SiPixelClustersSoACollection
constexpr uint32_t maxHitsInModule()