CMS 3D CMS Logo

UnpackPortable.dev.cc
Go to the documentation of this file.
1 #include <alpaka/alpaka.hpp>
2 
8 
9 #include "UnpackPortable.h"
10 
12 
13  using namespace ::ecal::raw;
14  using namespace cms::alpakatools;
15 
16  class Kernel_unpack {
17  public:
18  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
19  ALPAKA_FN_ACC void operator()(TAcc const& acc,
20  unsigned char const* __restrict__ data,
21  uint32_t const* __restrict__ offsets,
22  int const* __restrict__ feds,
25  EcalElectronicsMappingDevice::ConstView eid2did,
26  uint32_t const nbytesTotal) const {
27  constexpr auto kSampleSize = ecalPh1::sampleSize;
28 
29  // indices
30  auto const ifed = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u];
31  auto const threadIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
32 
33  // offset in bytes
34  auto const offset = offsets[ifed];
35  // fed id
36  auto const fed = feds[ifed];
37  auto const isBarrel = is_barrel(static_cast<uint8_t>(fed - 600));
38  // size
39  auto const gridDim = alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u];
40  auto const size = ifed == gridDim - 1 ? nbytesTotal - offset : offsets[ifed + 1] - offset;
41  auto* samples = isBarrel ? digisDevEB.data()->data() : digisDevEE.data()->data();
42  auto* ids = isBarrel ? digisDevEB.id() : digisDevEE.id();
43  auto* pChannelsCounter = isBarrel ? &digisDevEB.size() : &digisDevEE.size();
44 
45  // offset to the right raw buffer
46  uint64_t const* buffer = reinterpret_cast<uint64_t const*>(data + offset);
47 
48  // dump first 3 bits for each 64-bit word
49  //print_first3bits(buffer, size / 8);
50 
51  //
52  // fed header
53  //
54  auto const fed_header = buffer[0];
55  uint32_t bx = (fed_header >> H_BX_B) & H_BX_MASK;
56  uint32_t lv1 = (fed_header >> H_L1_B) & H_L1_MASK;
57  uint32_t triggerType = (fed_header >> H_TTYPE_B) & H_TTYPE_MASK;
58 
59  // determine the number of FE channels from the trigger type
60  uint32_t numbChannels(0);
61  if (triggerType == PHYSICTRIGGER) {
62  numbChannels = NUMB_FE;
63  } else if (triggerType == CALIBRATIONTRIGGER) {
64  numbChannels = NUMB_FE + 2; // FE + 2 MEM blocks
65  } else {
66  // unsupported trigger type
67  return;
68  }
69 
70  // 9 for fed + dcc header
71  // 36 for 4 EE TCC blocks or 18 for 1 EB TCC block
72  // 6 for SR block size
73 
74  // dcc header w2
75  auto const w2 = buffer[2];
76  uint8_t const fov = (w2 >> H_FOV_B) & H_FOV_MASK;
77 
78  // make a list of channels with data from DCC header channels status
79  // this could be done for each block instead of each thread since it defined per FED
80  uint8_t exp_ttids[NUMB_FE + 2]; // FE + 2 MEM blocks
81  uint8_t ch = 1;
82  uint8_t nCh = 0;
83  for (uint8_t i = 4; i < 9; ++i) { // data words with channel status info
84  for (uint8_t j = 0; j < 14; ++j, ++ch) { // channel status fields in one data word
85  const uint8_t shift = j * 4; //each channel has 4 bits
86  const int chStatus = (buffer[i] >> shift) & H_CHSTATUS_MASK;
87  const bool regular = (chStatus == CH_DISABLED || chStatus == CH_SUPPRESS);
88  const bool problematic =
89  (chStatus == CH_TIMEOUT || chStatus == CH_HEADERERR || chStatus == CH_LINKERR ||
90  chStatus == CH_LENGTHERR || chStatus == CH_IFIFOFULL || chStatus == CH_L1AIFIFOFULL);
91  if (!(regular || problematic)) {
92  exp_ttids[nCh] = ch;
93  ++nCh;
94  }
95  }
96  }
97 
98  //
99  // print Tower block headers
100  //
101  uint8_t ntccblockwords = isBarrel ? 18 : 36;
102  auto const* tower_blocks_start = buffer + 9 + ntccblockwords + 6;
103  auto const* trailer = buffer + (size / 8 - 1);
104  auto const* current_tower_block = tower_blocks_start;
105  uint8_t iCh = 0;
106  uint8_t next_tower_id = exp_ttids[iCh];
107  while (current_tower_block < trailer && iCh < numbChannels) {
108  auto const w = *current_tower_block;
109  uint8_t ttid = w & TOWER_ID_MASK;
110  uint16_t bxlocal = (w >> TOWER_BX_B) & TOWER_BX_MASK;
111  uint16_t lv1local = (w >> TOWER_L1_B) & TOWER_L1_MASK;
112  uint16_t block_length = (w >> TOWER_LENGTH_B) & TOWER_LENGTH_MASK;
113 
114  // fast forward to the next good tower id (in case of recovery from an earlier header corruption)
115  while (exp_ttids[iCh] < next_tower_id) {
116  ++iCh;
117  }
118  ++iCh;
119 
120  // check if the tower id in the tower header is the one expected
121  // if not try to find the next good header, point the current_tower_block to it, and extract its tower id
122  // or break if there is none
123  if (ttid != next_tower_id) {
124  next_tower_id = find_next_tower_block(current_tower_block, trailer, bx, lv1);
125  if (next_tower_id < TOWER_ID_MASK) {
126  continue;
127  } else {
128  break;
129  }
130  }
131 
132  // prepare for the next iteration
133  next_tower_id = exp_ttids[iCh];
134 
135  uint16_t const dccbx = bx & 0xfff;
136  uint16_t const dccl1 = lv1 & 0xfff;
137  // fov>=1 is required to support simulated data for which bx==bxlocal==0
138  if (fov >= 1 && !is_synced_towerblock(dccbx, bxlocal, dccl1, lv1local)) {
139  current_tower_block += block_length;
140  continue;
141  }
142 
143  // go through all the channels
144  // get the next channel coordinates
145  uint32_t const nchannels = (block_length - 1) / 3;
146 
147  bool bad_block = false;
148  auto& ch_with_bad_block = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
149  if (once_per_block(acc)) {
150  ch_with_bad_block = std::numeric_limits<uint32_t>::max();
151  }
152  // make sure the shared memory is initialised for all threads
153  alpaka::syncBlockThreads(acc);
154 
155  auto const threadsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u];
156  // 1 threads per channel in this block
157  // All threads enter the loop regardless if they will treat channel indices channel >= nchannels.
158  // The threads with excess indices perform no operations but also reach the syncBlockThreads() inside the loop.
159  for (uint32_t i = 0; i < nchannels; i += threadsPerBlock) {
160  auto const channel = i + threadIdx;
161 
162  uint64_t wdata;
163  uint8_t stripid;
164  uint8_t xtalid;
165 
166  // threads must be inside the range (no break here because of syncBlockThreads() afterwards)
167  if (channel < nchannels && channel < ch_with_bad_block) {
168  // inc the channel's counter and get the pos where to store
169  wdata = current_tower_block[1 + channel * 3];
170  stripid = wdata & 0x7;
171  xtalid = (wdata >> 4) & 0x7;
172 
173  // check if the stripid and xtalid are in the allowed range and if not skip the rest of the block
174  if (stripid < ElectronicsIdGPU::MIN_STRIPID || stripid > ElectronicsIdGPU::MAX_STRIPID ||
175  xtalid < ElectronicsIdGPU::MIN_XTALID || xtalid > ElectronicsIdGPU::MAX_XTALID) {
176  bad_block = true;
177  }
178  if (channel > 0) {
179  // check if the stripid has increased or that the xtalid has increased from the previous data word. If not something is wrong and the rest of the block is skipped.
180  auto const prev_channel = channel - 1;
181  auto const prevwdata = current_tower_block[1 + prev_channel * 3];
182  uint8_t const laststripid = prevwdata & 0x7;
183  uint8_t const lastxtalid = (prevwdata >> 4) & 0x7;
184  if ((stripid == laststripid && xtalid <= lastxtalid) || (stripid < laststripid)) {
185  bad_block = true;
186  }
187  }
188  }
189 
190  // check if this thread has the lowest bad block
191  if (bad_block && channel < ch_with_bad_block) {
192  alpaka::atomicMin(acc, &ch_with_bad_block, channel, alpaka::hierarchy::Threads{});
193  }
194 
195  // make sure that all threads that have to have set the ch_with_bad_block shared memory
196  alpaka::syncBlockThreads(acc);
197 
198  // threads outside of the range or bad block detected in this thread or one working on a lower block -> stop this loop iteration here
199  if (channel >= nchannels || channel >= ch_with_bad_block) {
200  continue;
201  }
202 
203  ElectronicsIdGPU eid{fed2dcc(fed), ttid, stripid, xtalid};
204  auto const didraw = isBarrel ? compute_ebdetid(eid) : eid2did[eid.linearIndex()].rawid();
205  // skip channels with an invalid detid
206  if (didraw == 0)
207  continue;
208 
209  // get samples
210  uint16_t sampleValues[kSampleSize];
211  sampleValues[0] = (wdata >> 16) & 0x3fff;
212  sampleValues[1] = (wdata >> 32) & 0x3fff;
213  sampleValues[2] = (wdata >> 48) & 0x3fff;
214  auto const wdata1 = current_tower_block[2 + channel * 3];
215  sampleValues[3] = wdata1 & 0x3fff;
216  sampleValues[4] = (wdata1 >> 16) & 0x3fff;
217  sampleValues[5] = (wdata1 >> 32) & 0x3fff;
218  sampleValues[6] = (wdata1 >> 48) & 0x3fff;
219  auto const wdata2 = current_tower_block[3 + channel * 3];
220  sampleValues[7] = wdata2 & 0x3fff;
221  sampleValues[8] = (wdata2 >> 16) & 0x3fff;
222  sampleValues[9] = (wdata2 >> 32) & 0x3fff;
223 
224  // check gain
225  bool isSaturation = true;
226  short firstGainZeroSampID{-1}, firstGainZeroSampADC{-1};
227  for (uint32_t si = 0; si < kSampleSize; ++si) {
228  if (gainId(sampleValues[si]) == 0) {
229  firstGainZeroSampID = si;
230  firstGainZeroSampADC = adc(sampleValues[si]);
231  break;
232  }
233  }
234  if (firstGainZeroSampID != -1) {
235  unsigned int plateauEnd = std::min(kSampleSize, (unsigned int)(firstGainZeroSampID + 5));
236  for (unsigned int s = firstGainZeroSampID; s < plateauEnd; s++) {
237  if (!(gainId(sampleValues[s]) == 0 && adc(sampleValues[s]) == firstGainZeroSampADC)) {
238  isSaturation = false;
239  break;
240  } //it's not saturation
241  }
242  // get rid of channels which are stuck in gain0
243  if (firstGainZeroSampID < 3) {
244  isSaturation = false;
245  }
246  if (!isSaturation)
247  continue;
248  } else { // there is no zero gainId sample
249  // gain switch check
250  short numGain = 1;
251  bool gainSwitchError = false;
252  for (unsigned int si = 1; si < kSampleSize; ++si) {
253  if ((gainId(sampleValues[si - 1]) > gainId(sampleValues[si])) && numGain < 5)
254  gainSwitchError = true;
255  if (gainId(sampleValues[si - 1]) == gainId(sampleValues[si]))
256  numGain++;
257  else
258  numGain = 1;
259  }
260  if (gainSwitchError)
261  continue;
262  }
263 
264  auto const pos = alpaka::atomicAdd(acc, pChannelsCounter, 1u, alpaka::hierarchy::Threads{});
265 
266  // store to global
267  ids[pos] = didraw;
268  std::memcpy(&samples[pos * kSampleSize], sampleValues, kSampleSize * sizeof(uint16_t));
269  }
270 
271  current_tower_block += block_length;
272  }
273  }
274 
275  private:
276  ALPAKA_FN_INLINE ALPAKA_FN_ACC void print_raw_buffer(uint8_t const* const buffer,
277  uint32_t const nbytes,
278  uint32_t const nbytes_per_row = 20) const {
279  for (uint32_t i = 0; i < nbytes; ++i) {
280  if (i % nbytes_per_row == 0 && i > 0)
281  printf("\n");
282  printf("%02X ", buffer[i]);
283  }
284  }
285 
286  ALPAKA_FN_INLINE ALPAKA_FN_ACC void print_first3bits(uint64_t const* buffer, uint32_t size) const {
287  for (uint32_t i = 0; i < size; ++i) {
288  uint8_t const b61 = (buffer[i] >> 61) & 0x1;
289  uint8_t const b62 = (buffer[i] >> 62) & 0x1;
290  uint8_t const b63 = (buffer[i] >> 63) & 0x1;
291  printf("[word: %u] %u%u%u\n", i, b63, b62, b61);
292  }
293  }
294 
295  ALPAKA_FN_INLINE ALPAKA_FN_ACC bool is_barrel(uint8_t dccid) const {
296  return dccid >= ElectronicsIdGPU::MIN_DCCID_EBM && dccid <= ElectronicsIdGPU::MAX_DCCID_EBP;
297  }
298 
299  ALPAKA_FN_INLINE ALPAKA_FN_ACC uint8_t fed2dcc(int fed) const { return static_cast<uint8_t>(fed - 600); }
300 
301  ALPAKA_FN_INLINE ALPAKA_FN_ACC int zside_for_eb(ElectronicsIdGPU const& eid) const {
302  int dcc = eid.dccId();
303  return ((dcc >= ElectronicsIdGPU::MIN_DCCID_EBM && dcc <= ElectronicsIdGPU::MAX_DCCID_EBM)) ? -1 : 1;
304  }
305 
306  ALPAKA_FN_INLINE ALPAKA_FN_ACC uint8_t find_next_tower_block(uint64_t const*& current_tower_block,
307  uint64_t const* trailer,
308  uint32_t const bx,
309  uint32_t const lv1) const {
310  const auto* next_tower_block = current_tower_block + 1; // move forward to skip the broken header
311 
312  // expected LV1, BX, #TS
313  const uint64_t lv1local = ((lv1 - 1) & TOWER_L1_MASK);
314  const uint64_t bxlocal = (bx != 3564) ? bx : 0;
315  // The CPU unpacker also checks the # time samples expected in the header
316  // but those are currently not available here
317 
318  // construct tower header and mask
319  const uint64_t sign = 0xC0000000C0000000 + (lv1local << TOWER_L1_B) + (bxlocal << TOWER_BX_B);
320  const uint64_t mask =
321  0xC0001000D0000000 + (uint64_t(TOWER_L1_MASK) << TOWER_L1_B) + (uint64_t(TOWER_BX_MASK) << TOWER_BX_B);
322 
323  while (next_tower_block < trailer) {
324  if ((*next_tower_block & mask) == sign) {
325  current_tower_block = next_tower_block;
326  return uint8_t(*next_tower_block & TOWER_ID_MASK);
327  } else {
328  ++next_tower_block;
329  }
330  }
331  return TOWER_ID_MASK; // return the maximum value
332  }
333 
334  ALPAKA_FN_INLINE ALPAKA_FN_ACC bool is_synced_towerblock(uint16_t const dccbx,
335  uint16_t const bx,
336  uint16_t const dccl1,
337  uint16_t const l1) const {
338  bool const bxsync = (bx == 0 && dccbx == 3564) || (bx == dccbx && dccbx != 3564);
339  bool const l1sync = (l1 == ((dccl1 - 1) & 0xfff));
340  return bxsync && l1sync;
341  }
342 
343  ALPAKA_FN_INLINE ALPAKA_FN_ACC bool right_tower_for_eb(int tower) const {
344  // for EB, two types of tower (LVRB top/bottom)
345  return (tower > 12 && tower < 21) || (tower > 28 && tower < 37) || (tower > 44 && tower < 53) ||
346  (tower > 60 && tower < 69);
347  }
348 
349  ALPAKA_FN_INLINE ALPAKA_FN_ACC uint32_t compute_ebdetid(ElectronicsIdGPU const& eid) const {
350  // as in Geometry/EcalMaping/.../EcalElectronicsMapping
351  auto const dcc = eid.dccId();
352  auto const tower = eid.towerId();
353  auto const strip = eid.stripId();
354  auto const xtal = eid.xtalId();
355 
356  int smid = 0;
357  int iphi = 0;
358  bool EBPlus = (zside_for_eb(eid) > 0);
359  bool EBMinus = !EBPlus;
360 
361  if (zside_for_eb(eid) < 0) {
362  smid = dcc + 19 - ElectronicsIdGPU::DCCID_PHI0_EBM;
363  iphi = (smid - 19) * ElectronicsIdGPU::kCrystalsInPhi;
364  iphi += 5 * ((tower - 1) % ElectronicsIdGPU::kTowersInPhi);
365  } else {
366  smid = dcc + 1 - ElectronicsIdGPU::DCCID_PHI0_EBP;
367  iphi = (smid - 1) * ElectronicsIdGPU::kCrystalsInPhi;
368  iphi += 5 * (ElectronicsIdGPU::kTowersInPhi - ((tower - 1) % ElectronicsIdGPU::kTowersInPhi) - 1);
369  }
370 
371  bool RightTower = right_tower_for_eb(tower);
372  int ieta = 5 * ((tower - 1) / ElectronicsIdGPU::kTowersInPhi) + 1;
373  if (RightTower) {
374  ieta += (strip - 1);
375  if (strip % 2 == 1) {
376  if (EBMinus)
377  iphi += (xtal - 1) + 1;
378  else
379  iphi += (4 - (xtal - 1)) + 1;
380  } else {
381  if (EBMinus)
382  iphi += (4 - (xtal - 1)) + 1;
383  else
384  iphi += (xtal - 1) + 1;
385  }
386  } else {
387  ieta += 4 - (strip - 1);
388  if (strip % 2 == 1) {
389  if (EBMinus)
390  iphi += (4 - (xtal - 1)) + 1;
391  else
392  iphi += (xtal - 1) + 1;
393  } else {
394  if (EBMinus)
395  iphi += (xtal - 1) + 1;
396  else
397  iphi += (4 - (xtal - 1)) + 1;
398  }
399  }
400 
401  if (zside_for_eb(eid) < 0)
402  ieta = -ieta;
403 
405  return did.rawId() | ((ieta > 0) ? (0x10000 | (ieta << 9)) : ((-ieta) << 9)) | (iphi & 0x1FF);
406  }
407 
408  ALPAKA_FN_INLINE ALPAKA_FN_ACC int adc(uint16_t sample) const { return sample & 0xfff; }
409 
410  ALPAKA_FN_INLINE ALPAKA_FN_ACC int gainId(uint16_t sample) const { return (sample >> 12) & 0x3; }
411  };
412 
413  void unpackRaw(Queue& queue,
414  InputDataHost const& inputHost,
415  EcalDigiDeviceCollection& digisDevEB,
416  EcalDigiDeviceCollection& digisDevEE,
418  uint32_t const nfedsWithData,
419  uint32_t const nbytesTotal) {
420  // input device buffers
421  ecal::raw::InputDataDevice inputDevice(queue, nbytesTotal, nfedsWithData);
422 
423  // transfer the raw data
424  alpaka::memcpy(queue, inputDevice.data, inputHost.data);
425  alpaka::memcpy(queue, inputDevice.offsets, inputHost.offsets);
426  alpaka::memcpy(queue, inputDevice.feds, inputHost.feds);
427 
428  auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(nfedsWithData, 32); // 32 channels per block
429  alpaka::exec<Acc1D>(queue,
430  workDiv,
431  Kernel_unpack{},
432  inputDevice.data.data(),
433  inputDevice.offsets.data(),
434  inputDevice.feds.data(),
435  digisDevEB.view(),
436  digisDevEE.view(),
437  mapping.const_view(),
438  nbytesTotal);
439  }
440 
441 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::raw
const dim3 threadIdx
Definition: cudaCompat.h:29
ALPAKA_FN_ACC constexpr bool once_per_block(TAcc const &acc)
common ppss p3p6s2 common epss epspn46 common const1 w2
Definition: inclppp.h:1
const dim3 gridDim
Definition: cudaCompat.h:33
cms::alpakatools::device_buffer< Device, uint32_t[]> offsets
T w() const
void unpackRaw(Queue &queue, InputDataHost const &inputHost, EcalDigiDeviceCollection &digisDevEB, EcalDigiDeviceCollection &digisDevEE, EcalElectronicsMappingDevice const &mapping, uint32_t const nfedsWithData, uint32_t const nbytesTotal)
ALPAKA_FN_ACC void operator()(TAcc const &acc, unsigned char const *__restrict__ data, uint32_t const *__restrict__ offsets, int const *__restrict__ feds, EcalDigiDeviceCollection::View digisDevEB, EcalDigiDeviceCollection::View digisDevEE, EcalElectronicsMappingDevice::ConstView eid2did, uint32_t const nbytesTotal) const
ALPAKA_FN_INLINE ALPAKA_FN_ACC uint8_t find_next_tower_block(uint64_t const *&current_tower_block, uint64_t const *trailer, uint32_t const bx, uint32_t const lv1) const
ALPAKA_FN_INLINE ALPAKA_FN_ACC uint32_t compute_ebdetid(ElectronicsIdGPU const &eid) const
ALPAKA_FN_INLINE ALPAKA_FN_ACC bool is_synced_towerblock(uint16_t const dccbx, uint16_t const bx, uint16_t const dccl1, uint16_t const l1) const
ALPAKA_FN_INLINE ALPAKA_FN_ACC bool is_barrel(uint8_t dccid) const
ALPAKA_FN_INLINE ALPAKA_FN_ACC bool right_tower_for_eb(int tower) const
cms::alpakatools::device_buffer< Device, int[]> feds
ALPAKA_FN_INLINE ALPAKA_FN_ACC int gainId(uint16_t sample) const
PortableCollection< EcalElectronicsMappingSoA > EcalElectronicsMappingDevice
cms::alpakatools::host_buffer< uint32_t[]> offsets
Definition: DetId.h:17
ALPAKA_FN_INLINE ALPAKA_FN_ACC uint8_t fed2dcc(int fed) const
constexpr int gainId(sample_type sample)
get the gainId (2 bits)
unsigned long long uint64_t
Definition: Time.h:13
cms::alpakatools::host_buffer< unsigned char[]> data
cms::alpakatools::host_buffer< int[]> feds
ALPAKA_FN_INLINE ALPAKA_FN_ACC int zside_for_eb(ElectronicsIdGPU const &eid) const
static constexpr unsigned int sampleSize
Definition: EcalConstants.h:53
PortableCollection< EcalDigiSoA > EcalDigiDeviceCollection
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
static unsigned int const shift
ALPAKA_FN_INLINE ALPAKA_FN_ACC int adc(uint16_t sample) const
T1 atomicMin(T1 *a, T2 b)
Definition: cudaCompat.h:85
ALPAKA_FN_INLINE ALPAKA_FN_ACC void print_first3bits(uint64_t const *buffer, uint32_t size) const
cms::alpakatools::device_buffer< Device, unsigned char[]> data
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
ALPAKA_FN_INLINE ALPAKA_FN_ACC void print_raw_buffer(uint8_t const *const buffer, uint32_t const nbytes, uint32_t const nbytes_per_row=20) const
uint16_t *__restrict__ uint16_t const *__restrict__ adc