CMS 3D CMS Logo

All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
LSTEvent.dev.cc
Go to the documentation of this file.
2 
3 #include "LSTEvent.h"
4 
5 #include "MiniDoublet.h"
6 #include "PixelQuintuplet.h"
7 #include "PixelTriplet.h"
8 #include "Quintuplet.h"
9 #include "Segment.h"
10 #include "TrackCandidate.h"
11 #include "Triplet.h"
12 
17 
18 using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst;
19 
21  alpaka::wait(queue_); // other calls can be asynchronous
22 
23  //reset the arrays
24  for (int i = 0; i < 6; i++) {
29  if (i < 5) {
34  }
35  }
36 }
37 
39  alpaka::wait(queue_); // synchronize to reset consistently
40  //reset the arrays
41  for (int i = 0; i < 6; i++) {
46  if (i < 5) {
51  }
52  }
53  hitsDC_.reset();
54  miniDoubletsDC_.reset();
55  rangesDC_.reset();
56  segmentsDC_.reset();
57  tripletsDC_.reset();
58  quintupletsDC_.reset();
59  trackCandidatesDC_.reset();
60  pixelTripletsDC_.reset();
61  pixelQuintupletsDC_.reset();
62 
63  hitsHC_.reset();
64  rangesHC_.reset();
65  miniDoubletsHC_.reset();
66  segmentsHC_.reset();
67  tripletsHC_.reset();
68  quintupletsHC_.reset();
69  pixelTripletsHC_.reset();
70  pixelQuintupletsHC_.reset();
71  trackCandidatesHC_.reset();
72  modulesHC_.reset();
73 }
74 
75 void LSTEvent::addHitToEvent(std::vector<float> const& x,
76  std::vector<float> const& y,
77  std::vector<float> const& z,
78  std::vector<unsigned int> const& detId,
79  std::vector<unsigned int> const& idxInNtuple) {
80  // Use the actual number of hits instead of a max.
81  unsigned int nHits = x.size();
82 
83  // Initialize space on device/host for next event.
84  if (!hitsDC_) {
85  std::array<int, 2> const hits_sizes{{static_cast<int>(nHits), static_cast<int>(nModules_)}};
86  hitsDC_.emplace(hits_sizes, queue_);
87 
88  auto hitsRanges = hitsDC_->view<HitsRangesSoA>();
89  auto hitRanges_view =
90  cms::alpakatools::make_device_view(queue_, hitsRanges.hitRanges(), hitsRanges.metadata().size());
91  auto hitRangesLower_view =
92  cms::alpakatools::make_device_view(queue_, hitsRanges.hitRangesLower(), hitsRanges.metadata().size());
93  auto hitRangesUpper_view =
94  cms::alpakatools::make_device_view(queue_, hitsRanges.hitRangesUpper(), hitsRanges.metadata().size());
95  auto hitRangesnLower_view =
96  cms::alpakatools::make_device_view(queue_, hitsRanges.hitRangesnLower(), hitsRanges.metadata().size());
97  auto hitRangesnUpper_view =
98  cms::alpakatools::make_device_view(queue_, hitsRanges.hitRangesnUpper(), hitsRanges.metadata().size());
99  alpaka::memset(queue_, hitRanges_view, 0xff);
100  alpaka::memset(queue_, hitRangesLower_view, 0xff);
101  alpaka::memset(queue_, hitRangesUpper_view, 0xff);
102  alpaka::memset(queue_, hitRangesnLower_view, 0xff);
103  alpaka::memset(queue_, hitRangesnUpper_view, 0xff);
104  }
105 
106  if (!rangesDC_) {
107  rangesDC_.emplace(nLowerModules_ + 1, queue_);
108  auto buf = rangesDC_->buffer();
109  alpaka::memset(queue_, buf, 0xff);
110  }
111 
112  // Copy the host arrays to the GPU.
113  auto hits = hitsDC_->view<HitsSoA>();
114  auto xs_d = cms::alpakatools::make_device_view(queue_, hits.xs(), (Idx)hits.metadata().size());
115  auto ys_d = cms::alpakatools::make_device_view(queue_, hits.ys(), (Idx)hits.metadata().size());
116  auto zs_d = cms::alpakatools::make_device_view(queue_, hits.zs(), (Idx)hits.metadata().size());
117  auto detId_d = cms::alpakatools::make_device_view(queue_, hits.detid(), (Idx)hits.metadata().size());
118  auto idxs_d = cms::alpakatools::make_device_view(queue_, hits.idxs(), (Idx)hits.metadata().size());
119  alpaka::memcpy(queue_, xs_d, x, (Idx)nHits);
120  alpaka::memcpy(queue_, ys_d, y, (Idx)nHits);
121  alpaka::memcpy(queue_, zs_d, z, (Idx)nHits);
122  alpaka::memcpy(queue_, detId_d, detId, (Idx)nHits);
123  alpaka::memcpy(queue_, idxs_d, idxInNtuple, (Idx)nHits);
124  alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory
125 
126  Vec3D const threadsPerBlock1{1, 1, 256};
127  Vec3D const blocksPerGrid1{1, 1, max_blocks};
128  WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread);
129 
130  alpaka::exec<Acc3D>(queue_,
131  hit_loop_workdiv,
132  HitLoopKernel{},
133  Endcap,
134  TwoS,
135  nModules_,
136  nEndCapMap_,
137  endcapGeometry_.const_view(),
138  modules_.const_view<ModulesSoA>(),
139  hitsDC_->view<HitsSoA>(),
140  hitsDC_->view<HitsRangesSoA>(),
141  nHits);
142 
143  Vec3D const threadsPerBlock2{1, 1, 256};
144  Vec3D const blocksPerGrid2{1, 1, max_blocks};
145  WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread);
146 
147  alpaka::exec<Acc3D>(queue_,
148  module_ranges_workdiv,
150  modules_.const_view<ModulesSoA>(),
151  hitsDC_->view<HitsRangesSoA>(),
153 }
154 
155 void LSTEvent::addPixelSegmentToEvent(std::vector<unsigned int> const& hitIndices0,
156  std::vector<unsigned int> const& hitIndices1,
157  std::vector<unsigned int> const& hitIndices2,
158  std::vector<unsigned int> const& hitIndices3,
159  std::vector<float> const& dPhiChange,
160  std::vector<float> const& ptIn,
161  std::vector<float> const& ptErr,
162  std::vector<float> const& px,
163  std::vector<float> const& py,
164  std::vector<float> const& pz,
165  std::vector<float> const& eta,
166  std::vector<float> const& etaErr,
167  std::vector<float> const& phi,
168  std::vector<int> const& charge,
169  std::vector<unsigned int> const& seedIdx,
170  std::vector<int> const& superbin,
171  std::vector<PixelType> const& pixelType,
172  std::vector<char> const& isQuad) {
173  unsigned int size = ptIn.size();
174 
175  if (size > n_max_pixel_segments_per_module) {
176  printf(
177  "*********************************************************\n"
178  "* Warning: Pixel line segments will be truncated. *\n"
179  "* You need to increase n_max_pixel_segments_per_module. *\n"
180  "*********************************************************\n");
182  }
183 
184  unsigned int mdSize = 2 * size;
185  uint16_t pixelModuleIndex = pixelMapping_.pixelModuleIndex;
186 
187  if (!miniDoubletsDC_) {
188  // Create a view for the element nLowerModules_ inside rangesOccupancy->miniDoubletModuleOccupancy
189  auto rangesOccupancy = rangesDC_->view();
190  auto dst_view_miniDoubletModuleOccupancy =
191  cms::alpakatools::make_device_view(queue_, rangesOccupancy.miniDoubletModuleOccupancy()[pixelModuleIndex]);
192 
193  // Create a host buffer for a value to be passed to the device
194  auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer<int>(queue_);
195  *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules;
196 
197  alpaka::memcpy(queue_, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h);
198 
199  WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv<Vec1D>({1}, {1024}, {1});
200 
201  alpaka::exec<Acc1D>(queue_,
202  createMDArrayRangesGPU_workDiv,
204  modules_.const_view<ModulesSoA>(),
205  rangesDC_->view());
206 
207  auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
208  auto nTotalMDs_buf_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalMDs());
209  alpaka::memcpy(queue_, nTotalMDs_buf_h, nTotalMDs_buf_d);
210  alpaka::wait(queue_); // wait to get the data before manipulation
211 
212  *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules;
213  unsigned int nTotalMDs = *nTotalMDs_buf_h.data();
214 
215  std::array<int, 2> const mds_sizes{{static_cast<int>(nTotalMDs), static_cast<int>(nLowerModules_ + 1)}};
216  miniDoubletsDC_.emplace(mds_sizes, queue_);
217 
218  auto mdsOccupancy = miniDoubletsDC_->view<MiniDoubletsOccupancySoA>();
219  auto nMDs_view = cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs(), mdsOccupancy.metadata().size());
220  auto totOccupancyMDs_view =
221  cms::alpakatools::make_device_view(queue_, mdsOccupancy.totOccupancyMDs(), mdsOccupancy.metadata().size());
222  alpaka::memset(queue_, nMDs_view, 0u);
223  alpaka::memset(queue_, totOccupancyMDs_view, 0u);
224  }
225  if (!segmentsDC_) {
226  // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously.
227  // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them
228 
229  WorkDiv1D const createSegmentArrayRanges_workDiv = createWorkDiv<Vec1D>({1}, {1024}, {1});
230 
231  alpaka::exec<Acc1D>(queue_,
232  createSegmentArrayRanges_workDiv,
234  modules_.const_view<ModulesSoA>(),
235  rangesDC_->view(),
236  miniDoubletsDC_->const_view<MiniDoubletsSoA>());
237 
238  auto rangesOccupancy = rangesDC_->view();
239  auto nTotalSegments_view_h = cms::alpakatools::make_host_view(nTotalSegments_);
240  auto nTotalSegments_view_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalSegs());
241  alpaka::memcpy(queue_, nTotalSegments_view_h, nTotalSegments_view_d);
242  alpaka::wait(queue_); // wait to get the value before manipulation
243 
245 
246  std::array<int, 3> const segments_sizes{{static_cast<int>(nTotalSegments_),
247  static_cast<int>(nLowerModules_ + 1),
248  static_cast<int>(n_max_pixel_segments_per_module)}};
249  segmentsDC_.emplace(segments_sizes, queue_);
250 
251  auto segmentsOccupancy = segmentsDC_->view<SegmentsOccupancySoA>();
252  auto nSegments_view =
253  cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments(), segmentsOccupancy.metadata().size());
254  auto totOccupancySegments_view = cms::alpakatools::make_device_view(
255  queue_, segmentsOccupancy.totOccupancySegments(), segmentsOccupancy.metadata().size());
256  alpaka::memset(queue_, nSegments_view, 0u);
257  alpaka::memset(queue_, totOccupancySegments_view, 0u);
258  }
259 
260  auto hitIndices0_dev = cms::alpakatools::make_device_buffer<unsigned int[]>(queue_, size);
261  auto hitIndices1_dev = cms::alpakatools::make_device_buffer<unsigned int[]>(queue_, size);
262  auto hitIndices2_dev = cms::alpakatools::make_device_buffer<unsigned int[]>(queue_, size);
263  auto hitIndices3_dev = cms::alpakatools::make_device_buffer<unsigned int[]>(queue_, size);
264  auto dPhiChange_dev = cms::alpakatools::make_device_buffer<float[]>(queue_, size);
265 
266  alpaka::memcpy(queue_, hitIndices0_dev, hitIndices0, size);
267  alpaka::memcpy(queue_, hitIndices1_dev, hitIndices1, size);
268  alpaka::memcpy(queue_, hitIndices2_dev, hitIndices2, size);
269  alpaka::memcpy(queue_, hitIndices3_dev, hitIndices3, size);
270  alpaka::memcpy(queue_, dPhiChange_dev, dPhiChange, size);
271 
272  SegmentsPixel segmentsPixel = segmentsDC_->view<SegmentsPixelSoA>();
273  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.ptIn(), size), ptIn, size);
274  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.ptErr(), size), ptErr, size);
275  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.px(), size), px, size);
276  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.py(), size), py, size);
277  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.pz(), size), pz, size);
278  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.etaErr(), size), etaErr, size);
279  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.isQuad(), size), isQuad, size);
280  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.eta(), size), eta, size);
281  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.phi(), size), phi, size);
282  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.charge(), size), charge, size);
283  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.seedIdx(), size), seedIdx, size);
284  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.superbin(), size), superbin, size);
285  alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.pixelType(), size), pixelType, size);
286 
287  // Create source views for size and mdSize
288  auto src_view_size = cms::alpakatools::make_host_view(size);
289  auto src_view_mdSize = cms::alpakatools::make_host_view(mdSize);
290 
291  auto segmentsOccupancy = segmentsDC_->view<SegmentsOccupancySoA>();
292  auto dst_view_segments = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments()[pixelModuleIndex]);
293  alpaka::memcpy(queue_, dst_view_segments, src_view_size);
294 
295  auto dst_view_totOccupancySegments =
296  cms::alpakatools::make_device_view(queue_, segmentsOccupancy.totOccupancySegments()[pixelModuleIndex]);
297  alpaka::memcpy(queue_, dst_view_totOccupancySegments, src_view_size);
298 
299  auto mdsOccupancy = miniDoubletsDC_->view<MiniDoubletsOccupancySoA>();
300  auto dst_view_nMDs = cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs()[pixelModuleIndex]);
301  alpaka::memcpy(queue_, dst_view_nMDs, src_view_mdSize);
302 
303  auto dst_view_totOccupancyMDs =
304  cms::alpakatools::make_device_view(queue_, mdsOccupancy.totOccupancyMDs()[pixelModuleIndex]);
305  alpaka::memcpy(queue_, dst_view_totOccupancyMDs, src_view_mdSize);
306 
307  alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory
308 
309  Vec3D const threadsPerBlock{1, 1, 256};
310  Vec3D const blocksPerGrid{1, 1, max_blocks};
311  WorkDiv3D const addPixelSegmentToEvent_workdiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread);
312 
313  alpaka::exec<Acc3D>(queue_,
314  addPixelSegmentToEvent_workdiv,
316  modules_.const_view<ModulesSoA>(),
317  rangesDC_->const_view(),
318  hitsDC_->view<HitsSoA>(),
320  segmentsDC_->view<SegmentsSoA>(),
321  segmentsDC_->view<SegmentsPixelSoA>(),
322  hitIndices0_dev.data(),
323  hitIndices1_dev.data(),
324  hitIndices2_dev.data(),
325  hitIndices3_dev.data(),
326  dPhiChange_dev.data(),
327  pixelModuleIndex,
328  size);
329 }
330 
332  // Create a view for the element nLowerModules_ inside rangesOccupancy->miniDoubletModuleOccupancy
333  auto rangesOccupancy = rangesDC_->view();
334  auto dst_view_miniDoubletModuleOccupancy =
335  cms::alpakatools::make_device_view(queue_, rangesOccupancy.miniDoubletModuleOccupancy()[nLowerModules_]);
336 
337  // Create a host buffer for a value to be passed to the device
338  auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer<int>(queue_);
339  *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules;
340 
341  alpaka::memcpy(queue_, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h);
342 
343  WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv<Vec1D>({1}, {1024}, {1});
344 
345  alpaka::exec<Acc1D>(queue_,
346  createMDArrayRangesGPU_workDiv,
348  modules_.const_view<ModulesSoA>(),
349  rangesDC_->view());
350 
351  auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
352  auto nTotalMDs_buf_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalMDs());
353  alpaka::memcpy(queue_, nTotalMDs_buf_h, nTotalMDs_buf_d);
354  alpaka::wait(queue_); // wait to get the data before manipulation
355 
356  *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules;
357  unsigned int nTotalMDs = *nTotalMDs_buf_h.data();
358 
359  if (!miniDoubletsDC_) {
360  std::array<int, 2> const mds_sizes{{static_cast<int>(nTotalMDs), static_cast<int>(nLowerModules_ + 1)}};
361  miniDoubletsDC_.emplace(mds_sizes, queue_);
362 
363  auto mdsOccupancy = miniDoubletsDC_->view<MiniDoubletsOccupancySoA>();
364  auto nMDs_view = cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs(), mdsOccupancy.metadata().size());
365  auto totOccupancyMDs_view =
366  cms::alpakatools::make_device_view(queue_, mdsOccupancy.totOccupancyMDs(), mdsOccupancy.metadata().size());
367  alpaka::memset(queue_, nMDs_view, 0u);
368  alpaka::memset(queue_, totOccupancyMDs_view, 0u);
369  }
370 
371  Vec3D const threadsPerBlockCreateMD{1, 16, 32};
372  Vec3D const blocksPerGridCreateMD{1, nLowerModules_ / threadsPerBlockCreateMD[1], 1};
373  WorkDiv3D const createMiniDoublets_workDiv =
374  createWorkDiv(blocksPerGridCreateMD, threadsPerBlockCreateMD, elementsPerThread);
375 
376  alpaka::exec<Acc3D>(queue_,
377  createMiniDoublets_workDiv,
379  modules_.const_view<ModulesSoA>(),
380  hitsDC_->const_view<HitsSoA>(),
381  hitsDC_->const_view<HitsRangesSoA>(),
384  rangesDC_->const_view());
385 
386  WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv<Vec1D>({1}, {1024}, {1});
387 
388  alpaka::exec<Acc1D>(queue_,
389  addMiniDoubletRangesToEventExplicit_workDiv,
391  modules_.const_view<ModulesSoA>(),
393  rangesDC_->view(),
394  hitsDC_->const_view<HitsRangesSoA>());
395 
396  if (addObjects_) {
398  }
399 }
400 
402  if (!segmentsDC_) {
403  std::array<int, 3> const segments_sizes{{static_cast<int>(nTotalSegments_),
404  static_cast<int>(nLowerModules_ + 1),
405  static_cast<int>(n_max_pixel_segments_per_module)}};
406  segmentsDC_.emplace(segments_sizes, queue_);
407 
408  auto segmentsOccupancy = segmentsDC_->view<SegmentsOccupancySoA>();
409  auto nSegments_view =
410  cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments(), segmentsOccupancy.metadata().size());
411  auto totOccupancySegments_view = cms::alpakatools::make_device_view(
412  queue_, segmentsOccupancy.totOccupancySegments(), segmentsOccupancy.metadata().size());
413  alpaka::memset(queue_, nSegments_view, 0u);
414  alpaka::memset(queue_, totOccupancySegments_view, 0u);
415  }
416 
417  Vec3D const threadsPerBlockCreateSeg{1, 1, 64};
418  Vec3D const blocksPerGridCreateSeg{1, 1, nLowerModules_};
419  WorkDiv3D const createSegments_workDiv =
420  createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread);
421 
422  alpaka::exec<Acc3D>(queue_,
423  createSegments_workDiv,
424  CreateSegments{},
425  modules_.const_view<ModulesSoA>(),
426  miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
428  segmentsDC_->view<SegmentsSoA>(),
430  rangesDC_->const_view());
431 
432  WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv<Vec1D>({1}, {1024}, {1});
433 
434  alpaka::exec<Acc1D>(queue_,
435  addSegmentRangesToEventExplicit_workDiv,
437  modules_.const_view<ModulesSoA>(),
439  rangesDC_->view());
440 
441  if (addObjects_) {
443  }
444 }
445 
447  if (!tripletsDC_) {
448  WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv<Vec1D>({1}, {1024}, {1});
449 
450  alpaka::exec<Acc1D>(queue_,
451  createTripletArrayRanges_workDiv,
453  modules_.const_view<ModulesSoA>(),
454  rangesDC_->view(),
455  segmentsDC_->const_view<SegmentsOccupancySoA>());
456 
457  // TODO: Why are we pulling this back down only to put it back on the device in a new struct?
458  auto rangesOccupancy = rangesDC_->view();
459  auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
460  auto maxTriplets_buf_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalTrips());
461  alpaka::memcpy(queue_, maxTriplets_buf_h, maxTriplets_buf_d);
462  alpaka::wait(queue_); // wait to get the value before using it
463 
464  std::array<int, 2> const triplets_sizes{
465  {static_cast<int>(*maxTriplets_buf_h.data()), static_cast<int>(nLowerModules_)}};
466  tripletsDC_.emplace(triplets_sizes, queue_);
467 
468  auto tripletsOccupancy = tripletsDC_->view<TripletsOccupancySoA>();
469  auto nTriplets_view =
470  cms::alpakatools::make_device_view(queue_, tripletsOccupancy.nTriplets(), tripletsOccupancy.metadata().size());
471  alpaka::memset(queue_, nTriplets_view, 0u);
472  auto totOccupancyTriplets_view = cms::alpakatools::make_device_view(
473  queue_, tripletsOccupancy.totOccupancyTriplets(), tripletsOccupancy.metadata().size());
474  alpaka::memset(queue_, totOccupancyTriplets_view, 0u);
475  auto triplets = tripletsDC_->view<TripletsSoA>();
476  auto partOfPT5_view = cms::alpakatools::make_device_view(queue_, triplets.partOfPT5(), triplets.metadata().size());
477  alpaka::memset(queue_, partOfPT5_view, 0u);
478  auto partOfT5_view = cms::alpakatools::make_device_view(queue_, triplets.partOfT5(), triplets.metadata().size());
479  alpaka::memset(queue_, partOfT5_view, 0u);
480  auto partOfPT3_view = cms::alpakatools::make_device_view(queue_, triplets.partOfPT3(), triplets.metadata().size());
481  alpaka::memset(queue_, partOfPT3_view, 0u);
482  }
483 
484  uint16_t nonZeroModules = 0;
485  unsigned int max_InnerSeg = 0;
486 
487  // Allocate and copy nSegments from device to host (only nLowerModules in OT, not the +1 with pLSs)
488  auto nSegments_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, nLowerModules_);
489  auto nSegments_buf_d = cms::alpakatools::make_device_view(
490  queue_, segmentsDC_->const_view<SegmentsOccupancySoA>().nSegments(), nLowerModules_);
491  alpaka::memcpy(queue_, nSegments_buf_h, nSegments_buf_d, nLowerModules_);
492 
493  // ... same for module_nConnectedModules
494  // FIXME: replace by ES host data
495  auto modules = modules_.const_view<ModulesSoA>();
496  auto module_nConnectedModules_buf_h = cms::alpakatools::make_host_buffer<uint16_t[]>(queue_, nLowerModules_);
497  auto module_nConnectedModules_buf_d =
498  cms::alpakatools::make_device_view(queue_, modules.nConnectedModules(), nLowerModules_); // only lower modules
499  alpaka::memcpy(queue_, module_nConnectedModules_buf_h, module_nConnectedModules_buf_d, nLowerModules_);
500 
501  alpaka::wait(queue_); // wait for nSegments and module_nConnectedModules before using
502 
503  auto const* nSegments = nSegments_buf_h.data();
504  auto const* module_nConnectedModules = module_nConnectedModules_buf_h.data();
505 
506  // Allocate host index and fill it directly
507  auto index_buf_h = cms::alpakatools::make_host_buffer<uint16_t[]>(queue_, nLowerModules_);
508  auto* index = index_buf_h.data();
509 
510  for (uint16_t innerLowerModuleIndex = 0; innerLowerModuleIndex < nLowerModules_; innerLowerModuleIndex++) {
511  uint16_t nConnectedModules = module_nConnectedModules[innerLowerModuleIndex];
512  unsigned int nInnerSegments = nSegments[innerLowerModuleIndex];
513  if (nConnectedModules != 0 and nInnerSegments != 0) {
514  index[nonZeroModules] = innerLowerModuleIndex;
515  nonZeroModules++;
516  }
517  max_InnerSeg = std::max(max_InnerSeg, nInnerSegments);
518  }
519 
520  // Allocate and copy to device index
521  auto index_gpu_buf = cms::alpakatools::make_device_buffer<uint16_t[]>(queue_, nLowerModules_);
522  alpaka::memcpy(queue_, index_gpu_buf, index_buf_h, nonZeroModules);
523 
524  Vec3D const threadsPerBlockCreateTrip{1, 16, 16};
525  Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1};
526  WorkDiv3D const createTriplets_workDiv =
527  createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread);
528 
529  alpaka::exec<Acc3D>(queue_,
530  createTriplets_workDiv,
531  CreateTriplets{},
532  modules_.const_view<ModulesSoA>(),
533  miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
534  segmentsDC_->const_view<SegmentsSoA>(),
535  segmentsDC_->const_view<SegmentsOccupancySoA>(),
536  tripletsDC_->view<TripletsSoA>(),
538  rangesDC_->const_view(),
539  index_gpu_buf.data(),
540  nonZeroModules);
541 
542  WorkDiv1D const addTripletRangesToEventExplicit_workDiv = createWorkDiv<Vec1D>({1}, {1024}, {1});
543 
544  alpaka::exec<Acc1D>(queue_,
545  addTripletRangesToEventExplicit_workDiv,
547  modules_.const_view<ModulesSoA>(),
548  tripletsDC_->const_view<TripletsOccupancySoA>(),
549  rangesDC_->view());
550 
551  if (addObjects_) {
553  }
554 }
555 
556 void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) {
557  if (!trackCandidatesDC_) {
559  auto buf = trackCandidatesDC_->buffer();
560  alpaka::memset(queue_, buf, 0u);
561  }
562 
563  Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64};
564  Vec3D const blocksPerGrid_crossCleanpT3{1, 4, 20};
565  WorkDiv3D const crossCleanpT3_workDiv =
566  createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, elementsPerThread);
567 
568  alpaka::exec<Acc3D>(queue_,
569  crossCleanpT3_workDiv,
570  CrossCleanpT3{},
571  modules_.const_view<ModulesSoA>(),
572  rangesDC_->const_view(),
573  pixelTripletsDC_->view(),
574  segmentsDC_->const_view<SegmentsPixelSoA>(),
575  pixelQuintupletsDC_->const_view());
576 
577  WorkDiv1D const addpT3asTrackCandidates_workDiv = createWorkDiv<Vec1D>({1}, {512}, {1});
578 
579  alpaka::exec<Acc1D>(queue_,
580  addpT3asTrackCandidates_workDiv,
583  pixelTripletsDC_->const_view(),
584  trackCandidatesDC_->view(),
585  segmentsDC_->const_view<SegmentsPixelSoA>(),
586  rangesDC_->const_view());
587 
588  // Pull nEligibleT5Modules from the device.
589  auto rangesOccupancy = rangesDC_->view();
590  auto nEligibleModules_buf_h = cms::alpakatools::make_host_buffer<uint16_t>(queue_);
591  auto nEligibleModules_buf_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nEligibleT5Modules());
592  alpaka::memcpy(queue_, nEligibleModules_buf_h, nEligibleModules_buf_d);
593  alpaka::wait(queue_); // wait to get the value before using
594  auto const nEligibleModules = *nEligibleModules_buf_h.data();
595 
596  Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32};
597  Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)};
598  WorkDiv3D const removeDupQuintupletsBeforeTC_workDiv =
599  createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread);
600 
601  alpaka::exec<Acc3D>(queue_,
602  removeDupQuintupletsBeforeTC_workDiv,
606  rangesDC_->const_view());
607 
608  Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32};
609  Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, max_blocks};
610  WorkDiv3D const crossCleanT5_workDiv =
611  createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, elementsPerThread);
612 
613  alpaka::exec<Acc3D>(queue_,
614  crossCleanT5_workDiv,
615  CrossCleanT5{},
616  modules_.const_view<ModulesSoA>(),
619  pixelQuintupletsDC_->const_view(),
620  pixelTripletsDC_->const_view(),
621  rangesDC_->const_view());
622 
623  Vec3D const threadsPerBlock_addT5asTrackCandidate{1, 8, 128};
624  Vec3D const blocksPerGrid_addT5asTrackCandidate{1, 8, 10};
625  WorkDiv3D const addT5asTrackCandidate_workDiv =
626  createWorkDiv(blocksPerGrid_addT5asTrackCandidate, threadsPerBlock_addT5asTrackCandidate, elementsPerThread);
627 
628  alpaka::exec<Acc3D>(queue_,
629  addT5asTrackCandidate_workDiv,
632  quintupletsDC_->const_view<QuintupletsSoA>(),
634  trackCandidatesDC_->view(),
635  rangesDC_->const_view());
636 
637  if (!no_pls_dupclean) {
638  Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16};
639  Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4};
640  WorkDiv3D const checkHitspLS_workDiv =
641  createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread);
642 
643  alpaka::exec<Acc3D>(queue_,
644  checkHitspLS_workDiv,
645  CheckHitspLS{},
646  modules_.const_view<ModulesSoA>(),
647  segmentsDC_->const_view<SegmentsOccupancySoA>(),
648  segmentsDC_->view<SegmentsPixelSoA>(),
649  true);
650  }
651 
652  Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32};
653  Vec3D const blocksPerGrid_crossCleanpLS{1, 4, 20};
654  WorkDiv3D const crossCleanpLS_workDiv =
655  createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, elementsPerThread);
656 
657  alpaka::exec<Acc3D>(queue_,
658  crossCleanpLS_workDiv,
659  CrossCleanpLS{},
660  modules_.const_view<ModulesSoA>(),
661  rangesDC_->const_view(),
662  pixelTripletsDC_->const_view(),
663  trackCandidatesDC_->view(),
664  segmentsDC_->const_view<SegmentsSoA>(),
665  segmentsDC_->const_view<SegmentsOccupancySoA>(),
666  segmentsDC_->view<SegmentsPixelSoA>(),
667  miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
668  hitsDC_->const_view<HitsSoA>(),
669  quintupletsDC_->const_view<QuintupletsSoA>());
670 
671  Vec3D const threadsPerBlock_addpLSasTrackCandidate{1, 1, 384};
672  Vec3D const blocksPerGrid_addpLSasTrackCandidate{1, 1, max_blocks};
673  WorkDiv3D const addpLSasTrackCandidate_workDiv =
674  createWorkDiv(blocksPerGrid_addpLSasTrackCandidate, threadsPerBlock_addpLSasTrackCandidate, elementsPerThread);
675 
676  alpaka::exec<Acc3D>(queue_,
677  addpLSasTrackCandidate_workDiv,
680  trackCandidatesDC_->view(),
681  segmentsDC_->const_view<SegmentsOccupancySoA>(),
682  segmentsDC_->const_view<SegmentsPixelSoA>(),
683  tc_pls_triplets);
684 
685  // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached
686  auto nTrackCanpT5Host_buf = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
687  auto nTrackCanpT3Host_buf = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
688  auto nTrackCanpLSHost_buf = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
689  auto nTrackCanT5Host_buf = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
690  alpaka::memcpy(queue_,
691  nTrackCanpT5Host_buf,
692  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespT5()));
693  alpaka::memcpy(queue_,
694  nTrackCanpT3Host_buf,
695  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespT3()));
696  alpaka::memcpy(queue_,
697  nTrackCanpLSHost_buf,
698  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespLS()));
699  alpaka::memcpy(queue_,
700  nTrackCanT5Host_buf,
701  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatesT5()));
702  alpaka::wait(queue_); // wait to get the values before using them
703 
704  auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data();
705  auto nTrackCandidatespT3 = *nTrackCanpT3Host_buf.data();
706  auto nTrackCandidatespLS = *nTrackCanpLSHost_buf.data();
707  auto nTrackCandidatesT5 = *nTrackCanT5Host_buf.data();
708  if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) ||
709  (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) {
710  printf(
711  "****************************************************************************************************\n"
712  "* Warning: Track candidates were possibly truncated. *\n"
713  "* You may need to increase either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates. *\n"
714  "* Run the code with the WARNINGS flag activated for more details. *\n"
715  "****************************************************************************************************\n");
716  }
717 }
718 
720  if (!pixelTripletsDC_) {
722  auto nPixelTriplets_view = cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->nPixelTriplets());
723  alpaka::memset(queue_, nPixelTriplets_view, 0u);
724  auto totOccupancyPixelTriplets_view =
725  cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->totOccupancyPixelTriplets());
726  alpaka::memset(queue_, totOccupancyPixelTriplets_view, 0u);
727  }
728  SegmentsOccupancy segmentsOccupancy = segmentsDC_->view<SegmentsOccupancySoA>();
729  SegmentsPixelConst segmentsPixel = segmentsDC_->view<SegmentsPixelSoA>();
730 
731  auto superbins_buf = cms::alpakatools::make_host_buffer<int[]>(queue_, n_max_pixel_segments_per_module);
732  auto pixelTypes_buf = cms::alpakatools::make_host_buffer<PixelType[]>(queue_, n_max_pixel_segments_per_module);
733 
734  alpaka::memcpy(queue_,
735  superbins_buf,
737  alpaka::memcpy(
738  queue_,
739  pixelTypes_buf,
741  auto const* superbins = superbins_buf.data();
742  auto const* pixelTypes = pixelTypes_buf.data();
743 
744  unsigned int nInnerSegments;
745  auto nInnerSegments_src_view = cms::alpakatools::make_host_view(nInnerSegments);
746 
747  // Create a sub-view for the device buffer
748  auto dev_view_nSegments = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments()[nLowerModules_]);
749 
750  alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments);
751  alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using
752 
753  auto connectedPixelSize_host_buf = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, nInnerSegments);
754  auto connectedPixelIndex_host_buf = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, nInnerSegments);
755  auto connectedPixelSize_dev_buf = cms::alpakatools::make_device_buffer<unsigned int[]>(queue_, nInnerSegments);
756  auto connectedPixelIndex_dev_buf = cms::alpakatools::make_device_buffer<unsigned int[]>(queue_, nInnerSegments);
757 
758  unsigned int* connectedPixelSize_host = connectedPixelSize_host_buf.data();
759  unsigned int* connectedPixelIndex_host = connectedPixelIndex_host_buf.data();
760 
761  int pixelIndexOffsetPos =
762  pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1];
763  int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[size_superbins - 1] +
764  pixelMapping_.connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos;
765 
766  // TODO: check if a map/reduction to just eligible pLSs would speed up the kernel
767  // the current selection still leaves a significant fraction of unmatchable pLSs
768  for (unsigned int i = 0; i < nInnerSegments; i++) { // loop over # pLS
769  PixelType pixelType = pixelTypes[i]; // Get pixel type for this pLS
770  int superbin = superbins[i]; // Get superbin for this pixel
771  if ((superbin < 0) or (superbin >= (int)size_superbins) or
772  ((pixelType != PixelType::kHighPt) and (pixelType != PixelType::kLowPtPosCurv) and
773  (pixelType != PixelType::kLowPtNegCurv))) {
774  connectedPixelSize_host[i] = 0;
775  connectedPixelIndex_host[i] = 0;
776  continue;
777  }
778 
779  // Used pixel type to select correct size-index arrays
780  switch (pixelType) {
781  case PixelType::kInvalid:
782  break;
783  case PixelType::kHighPt:
784  // number of connected modules to this pixel
785  connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizes[superbin];
786  // index to get start of connected modules for this superbin in map
787  connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndex[superbin];
788  break;
790  // number of connected modules to this pixel
791  connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesPos[superbin];
792  // index to get start of connected modules for this superbin in map
793  connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexPos[superbin] + pixelIndexOffsetPos;
794  break;
796  // number of connected modules to this pixel
797  connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesNeg[superbin];
798  // index to get start of connected modules for this superbin in map
799  connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexNeg[superbin] + pixelIndexOffsetNeg;
800  break;
801  }
802  }
803 
804  alpaka::memcpy(queue_, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments);
805  alpaka::memcpy(queue_, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments);
806 
807  Vec3D const threadsPerBlock{1, 4, 32};
808  Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1};
809  WorkDiv3D const createPixelTripletsFromMap_workDiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread);
810 
811  alpaka::exec<Acc3D>(queue_,
812  createPixelTripletsFromMap_workDiv,
814  modules_.const_view<ModulesSoA>(),
815  modules_.const_view<ModulesPixelSoA>(),
816  rangesDC_->const_view(),
817  miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
818  segmentsDC_->const_view<SegmentsSoA>(),
819  segmentsDC_->const_view<SegmentsPixelSoA>(),
820  tripletsDC_->view<TripletsSoA>(),
821  tripletsDC_->const_view<TripletsOccupancySoA>(),
822  pixelTripletsDC_->view(),
823  connectedPixelSize_dev_buf.data(),
824  connectedPixelIndex_dev_buf.data(),
825  nInnerSegments);
826 
827 #ifdef WARNINGS
828  auto nPixelTriplets_buf = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
829 
830  alpaka::memcpy(
831  queue_, nPixelTriplets_buf, cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->nPixelTriplets()));
832  alpaka::wait(queue_); // wait to get the value before using it
833 
834  std::cout << "number of pixel triplets = " << *nPixelTriplets_buf.data() << std::endl;
835 #endif
836 
837  //pT3s can be cleaned here because they're not used in making pT5s!
838  Vec3D const threadsPerBlockDupPixTrip{1, 16, 16};
839  //seems like more blocks lead to conflicting writes
840  Vec3D const blocksPerGridDupPixTrip{1, 40, 1};
841  WorkDiv3D const removeDupPixelTripletsFromMap_workDiv =
842  createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread);
843 
844  alpaka::exec<Acc3D>(
845  queue_, removeDupPixelTripletsFromMap_workDiv, RemoveDupPixelTripletsFromMap{}, pixelTripletsDC_->view());
846 }
847 
849  WorkDiv1D const createEligibleModulesListForQuintuplets_workDiv = createWorkDiv<Vec1D>({1}, {1024}, {1});
850 
851  alpaka::exec<Acc1D>(queue_,
852  createEligibleModulesListForQuintuplets_workDiv,
854  modules_.const_view<ModulesSoA>(),
855  tripletsDC_->const_view<TripletsOccupancySoA>(),
856  rangesDC_->view());
857 
858  auto nEligibleT5Modules_buf = cms::alpakatools::make_host_buffer<uint16_t>(queue_);
859  auto nTotalQuintuplets_buf = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
860  auto rangesOccupancy = rangesDC_->view();
861  auto nEligibleT5Modules_view_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nEligibleT5Modules());
862  auto nTotalQuintuplets_view_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalQuints());
863  alpaka::memcpy(queue_, nEligibleT5Modules_buf, nEligibleT5Modules_view_d);
864  alpaka::memcpy(queue_, nTotalQuintuplets_buf, nTotalQuintuplets_view_d);
865  alpaka::wait(queue_); // wait for the values before using them
866 
867  auto nEligibleT5Modules = *nEligibleT5Modules_buf.data();
868  auto nTotalQuintuplets = *nTotalQuintuplets_buf.data();
869 
870  if (!quintupletsDC_) {
871  std::array<int, 2> const quintuplets_sizes{{static_cast<int>(nTotalQuintuplets), static_cast<int>(nLowerModules_)}};
872  quintupletsDC_.emplace(quintuplets_sizes, queue_);
873  auto quintupletsOccupancy = quintupletsDC_->view<QuintupletsOccupancySoA>();
874  auto nQuintuplets_view = cms::alpakatools::make_device_view(
875  queue_, quintupletsOccupancy.nQuintuplets(), quintupletsOccupancy.metadata().size());
876  alpaka::memset(queue_, nQuintuplets_view, 0u);
877  auto totOccupancyQuintuplets_view = cms::alpakatools::make_device_view(
878  queue_, quintupletsOccupancy.totOccupancyQuintuplets(), quintupletsOccupancy.metadata().size());
879  alpaka::memset(queue_, totOccupancyQuintuplets_view, 0u);
880  auto quintuplets = quintupletsDC_->view<QuintupletsSoA>();
881  auto isDup_view = cms::alpakatools::make_device_view(queue_, quintuplets.isDup(), quintuplets.metadata().size());
882  alpaka::memset(queue_, isDup_view, 0u);
883  auto tightCutFlag_view =
884  cms::alpakatools::make_device_view(queue_, quintuplets.tightCutFlag(), quintuplets.metadata().size());
885  alpaka::memset(queue_, tightCutFlag_view, 0u);
886  auto partOfPT5_view =
887  cms::alpakatools::make_device_view(queue_, quintuplets.partOfPT5(), quintuplets.metadata().size());
888  alpaka::memset(queue_, partOfPT5_view, 0u);
889  }
890 
891  Vec3D const threadsPerBlockQuints{1, 8, 32};
892  Vec3D const blocksPerGridQuints{std::max((int)nEligibleT5Modules, 1), 1, 1};
893  WorkDiv3D const createQuintuplets_workDiv =
894  createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread);
895 
896  alpaka::exec<Acc3D>(queue_,
897  createQuintuplets_workDiv,
899  modules_.const_view<ModulesSoA>(),
900  miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
901  segmentsDC_->const_view<SegmentsSoA>(),
902  tripletsDC_->view<TripletsSoA>(),
903  tripletsDC_->const_view<TripletsOccupancySoA>(),
906  rangesDC_->const_view(),
907  nEligibleT5Modules);
908 
909  Vec3D const threadsPerBlockDupQuint{1, 16, 16};
910  Vec3D const blocksPerGridDupQuint{max_blocks, 1, 1};
911  WorkDiv3D const removeDupQuintupletsAfterBuild_workDiv =
912  createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread);
913 
914  alpaka::exec<Acc3D>(queue_,
915  removeDupQuintupletsAfterBuild_workDiv,
917  modules_.const_view<ModulesSoA>(),
920  rangesDC_->const_view());
921 
922  WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv<Vec1D>({1}, {1024}, {1});
923 
924  alpaka::exec<Acc1D>(queue_,
925  addQuintupletRangesToEventExplicit_workDiv,
927  modules_.const_view<ModulesSoA>(),
929  rangesDC_->view());
930 
931  if (addObjects_) {
933  }
934 }
935 
936 void LSTEvent::pixelLineSegmentCleaning(bool no_pls_dupclean) {
937  if (!no_pls_dupclean) {
938  Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16};
939  Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4};
940  WorkDiv3D const checkHitspLS_workDiv =
941  createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread);
942 
943  alpaka::exec<Acc3D>(queue_,
944  checkHitspLS_workDiv,
945  CheckHitspLS{},
946  modules_.const_view<ModulesSoA>(),
947  segmentsDC_->const_view<SegmentsOccupancySoA>(),
948  segmentsDC_->view<SegmentsPixelSoA>(),
949  false);
950  }
951 }
952 
954  if (!pixelQuintupletsDC_) {
956  auto nPixelQuintuplets_view =
957  cms::alpakatools::make_device_view(queue_, (*pixelQuintupletsDC_)->nPixelQuintuplets());
958  alpaka::memset(queue_, nPixelQuintuplets_view, 0u);
959  auto totOccupancyPixelQuintuplets_view =
960  cms::alpakatools::make_device_view(queue_, (*pixelQuintupletsDC_)->totOccupancyPixelQuintuplets());
961  alpaka::memset(queue_, totOccupancyPixelQuintuplets_view, 0u);
962  }
963  if (!trackCandidatesDC_) {
965  auto buf = trackCandidatesDC_->buffer();
966  alpaka::memset(queue_, buf, 0u);
967  }
968  SegmentsOccupancy segmentsOccupancy = segmentsDC_->view<SegmentsOccupancySoA>();
969  SegmentsPixelConst segmentsPixel = segmentsDC_->view<SegmentsPixelSoA>();
970 
971  auto superbins_buf = cms::alpakatools::make_host_buffer<int[]>(queue_, n_max_pixel_segments_per_module);
972  auto pixelTypes_buf = cms::alpakatools::make_host_buffer<PixelType[]>(queue_, n_max_pixel_segments_per_module);
973 
974  alpaka::memcpy(queue_,
975  superbins_buf,
977  alpaka::memcpy(
978  queue_,
979  pixelTypes_buf,
981  auto const* superbins = superbins_buf.data();
982  auto const* pixelTypes = pixelTypes_buf.data();
983 
984  unsigned int nInnerSegments;
985  auto nInnerSegments_src_view = cms::alpakatools::make_host_view(nInnerSegments);
986 
987  // Create a sub-view for the device buffer
988  unsigned int totalModules = nLowerModules_ + 1;
989  auto dev_view_nSegments_buf = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments(), totalModules);
990  auto dev_view_nSegments = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments()[nLowerModules_]);
991 
992  alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments);
993  alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using
994 
995  auto connectedPixelSize_host_buf = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, nInnerSegments);
996  auto connectedPixelIndex_host_buf = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, nInnerSegments);
997  auto connectedPixelSize_dev_buf = cms::alpakatools::make_device_buffer<unsigned int[]>(queue_, nInnerSegments);
998  auto connectedPixelIndex_dev_buf = cms::alpakatools::make_device_buffer<unsigned int[]>(queue_, nInnerSegments);
999 
1000  auto* connectedPixelSize_host = connectedPixelSize_host_buf.data();
1001  auto* connectedPixelIndex_host = connectedPixelIndex_host_buf.data();
1002 
1003  int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::size_superbins - 1] +
1004  pixelMapping_.connectedPixelsSizes[::size_superbins - 1];
1005  int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::size_superbins - 1] +
1006  pixelMapping_.connectedPixelsSizesPos[::size_superbins - 1] + pixelIndexOffsetPos;
1007 
1008  // Loop over # pLS
1009  for (unsigned int i = 0; i < nInnerSegments; i++) {
1010  PixelType pixelType = pixelTypes[i]; // Get pixel type for this pLS
1011  int superbin = superbins[i]; // Get superbin for this pixel
1012  if ((superbin < 0) or (superbin >= (int)size_superbins) or
1013  ((pixelType != PixelType::kHighPt) and (pixelType != PixelType::kLowPtPosCurv) and
1014  (pixelType != PixelType::kLowPtNegCurv))) {
1015  connectedPixelSize_host[i] = 0;
1016  connectedPixelIndex_host[i] = 0;
1017  continue;
1018  }
1019 
1020  // Used pixel type to select correct size-index arrays
1021  switch (pixelType) {
1022  case PixelType::kInvalid:
1023  break;
1024  case PixelType::kHighPt:
1025  // number of connected modules to this pixel
1026  connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizes[superbin];
1027  // index to get start of connected modules for this superbin in map
1028  connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndex[superbin];
1029  break;
1031  // number of connected modules to this pixel
1032  connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesPos[superbin];
1033  // index to get start of connected modules for this superbin in map
1034  connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexPos[superbin] + pixelIndexOffsetPos;
1035  break;
1037  // number of connected modules to this pixel
1038  connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesNeg[superbin];
1039  // index to get start of connected modules for this superbin in map
1040  connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexNeg[superbin] + pixelIndexOffsetNeg;
1041  break;
1042  }
1043  }
1044 
1045  alpaka::memcpy(queue_, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments);
1046  alpaka::memcpy(queue_, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments);
1047 
1048  Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16};
1049  Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1};
1050  WorkDiv3D const createPixelQuintupletsFromMap_workDiv =
1051  createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread);
1052 
1053  alpaka::exec<Acc3D>(queue_,
1054  createPixelQuintupletsFromMap_workDiv,
1056  modules_.const_view<ModulesSoA>(),
1057  modules_.const_view<ModulesPixelSoA>(),
1058  miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
1059  segmentsDC_->const_view<SegmentsSoA>(),
1060  segmentsDC_->view<SegmentsPixelSoA>(),
1061  tripletsDC_->view<TripletsSoA>(),
1062  quintupletsDC_->view<QuintupletsSoA>(),
1063  quintupletsDC_->const_view<QuintupletsOccupancySoA>(),
1064  pixelQuintupletsDC_->view(),
1065  connectedPixelSize_dev_buf.data(),
1066  connectedPixelIndex_dev_buf.data(),
1067  nInnerSegments,
1068  rangesDC_->const_view());
1069 
1070  Vec3D const threadsPerBlockDupPix{1, 16, 16};
1071  Vec3D const blocksPerGridDupPix{1, max_blocks, 1};
1072  WorkDiv3D const removeDupPixelQuintupletsFromMap_workDiv =
1073  createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread);
1074 
1075  alpaka::exec<Acc3D>(queue_,
1076  removeDupPixelQuintupletsFromMap_workDiv,
1078  pixelQuintupletsDC_->view());
1079 
1080  WorkDiv1D const addpT5asTrackCandidate_workDiv = createWorkDiv<Vec1D>({1}, {256}, {1});
1081 
1082  alpaka::exec<Acc1D>(queue_,
1083  addpT5asTrackCandidate_workDiv,
1086  pixelQuintupletsDC_->const_view(),
1087  trackCandidatesDC_->view(),
1088  segmentsDC_->const_view<SegmentsPixelSoA>(),
1089  rangesDC_->const_view());
1090 
1091 #ifdef WARNINGS
1092  auto nPixelQuintuplets_buf = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
1093 
1094  alpaka::memcpy(queue_,
1095  nPixelQuintuplets_buf,
1096  cms::alpakatools::make_device_view(queue_, (*pixelQuintupletsDC_)->nPixelQuintuplets()));
1097  alpaka::wait(queue_); // wait to get the value before using it
1098 
1099  std::cout << "number of pixel quintuplets = " << *nPixelQuintuplets_buf.data() << std::endl;
1100 #endif
1101 }
1102 
1104  auto nMDsCPU_buf = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, nLowerModules_);
1105  auto mdsOccupancy = miniDoubletsDC_->const_view<MiniDoubletsOccupancySoA>();
1106  auto nMDs_view =
1107  cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs(), nLowerModules_); // exclude pixel part
1108  alpaka::memcpy(queue_, nMDsCPU_buf, nMDs_view, nLowerModules_);
1109 
1110  auto modules = modules_.const_view<ModulesSoA>();
1111 
1112  // FIXME: replace by ES host data
1113  auto module_subdets_buf = cms::alpakatools::make_host_buffer<short[]>(queue_, nLowerModules_);
1114  auto module_subdets_view =
1115  cms::alpakatools::make_device_view(queue_, modules.subdets(), nLowerModules_); // only lower modules
1116  alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nLowerModules_);
1117 
1118  auto module_layers_buf = cms::alpakatools::make_host_buffer<short[]>(queue_, nLowerModules_);
1119  auto module_layers_view =
1120  cms::alpakatools::make_device_view(queue_, modules.layers(), nLowerModules_); // only lower modules
1121  alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_);
1122 
1123  auto module_hitRanges_buf = cms::alpakatools::make_host_buffer<ArrayIx2[]>(queue_, nLowerModules_);
1124  auto hits = hitsDC_->view<HitsRangesSoA>();
1125  auto hitRanges_view =
1126  cms::alpakatools::make_device_view(queue_, hits.hitRanges(), nLowerModules_); // only lower modules
1127  alpaka::memcpy(queue_, module_hitRanges_buf, hitRanges_view, nLowerModules_);
1128 
1129  alpaka::wait(queue_); // wait for inputs before using them
1130 
1131  auto const* nMDsCPU = nMDsCPU_buf.data();
1132  auto const* module_subdets = module_subdets_buf.data();
1133  auto const* module_layers = module_layers_buf.data();
1134  auto const* module_hitRanges = module_hitRanges_buf.data();
1135 
1136  for (unsigned int i = 0; i < nLowerModules_; i++) {
1137  if (!(nMDsCPU[i] == 0 or module_hitRanges[i][0] == -1)) {
1138  if (module_subdets[i] == Barrel) {
1140  } else {
1142  }
1143  }
1144  }
1145 }
1146 
1148  auto nSegmentsCPU_buf = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, nLowerModules_);
1149  auto nSegments_buf = cms::alpakatools::make_device_view(
1150  queue_, segmentsDC_->const_view<SegmentsOccupancySoA>().nSegments(), nLowerModules_);
1151  alpaka::memcpy(queue_, nSegmentsCPU_buf, nSegments_buf, nLowerModules_);
1152 
1153  auto modules = modules_.const_view<ModulesSoA>();
1154 
1155  // FIXME: replace by ES host data
1156  auto module_subdets_buf = cms::alpakatools::make_host_buffer<short[]>(queue_, nLowerModules_);
1157  auto module_subdets_view =
1158  cms::alpakatools::make_device_view(queue_, modules.subdets(), nLowerModules_); // only lower modules
1159  alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nLowerModules_);
1160 
1161  auto module_layers_buf = cms::alpakatools::make_host_buffer<short[]>(queue_, nLowerModules_);
1162  auto module_layers_view =
1163  cms::alpakatools::make_device_view(queue_, modules.layers(), nLowerModules_); // only lower modules
1164  alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_);
1165 
1166  alpaka::wait(queue_); // wait for inputs before using them
1167 
1168  auto const* nSegmentsCPU = nSegmentsCPU_buf.data();
1169  auto const* module_subdets = module_subdets_buf.data();
1170  auto const* module_layers = module_layers_buf.data();
1171 
1172  for (unsigned int i = 0; i < nLowerModules_; i++) {
1173  if (!(nSegmentsCPU[i] == 0)) {
1174  if (module_subdets[i] == Barrel) {
1175  n_segments_by_layer_barrel_[module_layers[i] - 1] += nSegmentsCPU[i];
1176  } else {
1177  n_segments_by_layer_endcap_[module_layers[i] - 1] += nSegmentsCPU[i];
1178  }
1179  }
1180  }
1181 }
1182 
1184  auto quintupletsOccupancy = quintupletsDC_->const_view<QuintupletsOccupancySoA>();
1185  auto nQuintuplets_view =
1186  cms::alpakatools::make_device_view(queue_, quintupletsOccupancy.nQuintuplets(), nLowerModules_);
1187  auto nQuintupletsCPU_buf = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, nLowerModules_);
1188  alpaka::memcpy(queue_, nQuintupletsCPU_buf, nQuintuplets_view);
1189 
1190  auto modules = modules_.const_view<ModulesSoA>();
1191 
1192  // FIXME: replace by ES host data
1193  auto module_subdets_buf = cms::alpakatools::make_host_buffer<short[]>(queue_, nLowerModules_);
1194  auto module_subdets_view = cms::alpakatools::make_device_view(queue_, modules.subdets(), modules.metadata().size());
1195  alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nModules_);
1196 
1197  auto module_layers_buf = cms::alpakatools::make_host_buffer<short[]>(queue_, nLowerModules_);
1198  auto module_layers_view =
1199  cms::alpakatools::make_device_view(queue_, modules.layers(), nLowerModules_); // only lower modules
1200  alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_);
1201 
1202  auto module_quintupletModuleIndices_buf = cms::alpakatools::make_host_buffer<int[]>(queue_, nLowerModules_);
1203  auto rangesOccupancy = rangesDC_->view();
1204  auto quintupletModuleIndices_view_d =
1205  cms::alpakatools::make_device_view(queue_, rangesOccupancy.quintupletModuleIndices(), nLowerModules_);
1206  alpaka::memcpy(queue_, module_quintupletModuleIndices_buf, quintupletModuleIndices_view_d);
1207 
1208  alpaka::wait(queue_); // wait for inputs before using them
1209 
1210  auto const* nQuintupletsCPU = nQuintupletsCPU_buf.data();
1211  auto const* module_subdets = module_subdets_buf.data();
1212  auto const* module_layers = module_layers_buf.data();
1213  auto const* module_quintupletModuleIndices = module_quintupletModuleIndices_buf.data();
1214 
1215  for (uint16_t i = 0; i < nLowerModules_; i++) {
1216  if (!(nQuintupletsCPU[i] == 0 or module_quintupletModuleIndices[i] == -1)) {
1217  if (module_subdets[i] == Barrel) {
1218  n_quintuplets_by_layer_barrel_[module_layers[i] - 1] += nQuintupletsCPU[i];
1219  } else {
1220  n_quintuplets_by_layer_endcap_[module_layers[i] - 1] += nQuintupletsCPU[i];
1221  }
1222  }
1223  }
1224 }
1225 
1227  auto tripletsOccupancy = tripletsDC_->const_view<TripletsOccupancySoA>();
1228  auto nTriplets_view = cms::alpakatools::make_device_view(queue_, tripletsOccupancy.nTriplets(), nLowerModules_);
1229  auto nTripletsCPU_buf = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, nLowerModules_);
1230  alpaka::memcpy(queue_, nTripletsCPU_buf, nTriplets_view);
1231 
1232  auto modules = modules_.const_view<ModulesSoA>();
1233 
1234  // FIXME: replace by ES host data
1235  auto module_subdets_buf = cms::alpakatools::make_host_buffer<short[]>(queue_, nLowerModules_);
1236  auto module_subdets_view =
1237  cms::alpakatools::make_device_view(queue_, modules.subdets(), nLowerModules_); // only lower modules
1238  alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nLowerModules_);
1239 
1240  auto module_layers_buf = cms::alpakatools::make_host_buffer<short[]>(queue_, nLowerModules_);
1241  auto module_layers_view =
1242  cms::alpakatools::make_device_view(queue_, modules.layers(), nLowerModules_); // only lower modules
1243  alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_);
1244 
1245  alpaka::wait(queue_); // wait for inputs before using them
1246 
1247  auto const* nTripletsCPU = nTripletsCPU_buf.data();
1248  auto const* module_subdets = module_subdets_buf.data();
1249  auto const* module_layers = module_layers_buf.data();
1250 
1251  for (uint16_t i = 0; i < nLowerModules_; i++) {
1252  if (nTripletsCPU[i] != 0) {
1253  if (module_subdets[i] == Barrel) {
1254  n_triplets_by_layer_barrel_[module_layers[i] - 1] += nTripletsCPU[i];
1255  } else {
1256  n_triplets_by_layer_endcap_[module_layers[i] - 1] += nTripletsCPU[i];
1257  }
1258  }
1259  }
1260 }
1261 
1263  unsigned int miniDoublets = 0;
1264  for (auto& it : n_minidoublets_by_layer_barrel_) {
1265  miniDoublets += it;
1266  }
1267  for (auto& it : n_minidoublets_by_layer_endcap_) {
1268  miniDoublets += it;
1269  }
1270 
1271  return miniDoublets;
1272 }
1273 
1274 unsigned int LSTEvent::getNumberOfMiniDoubletsByLayer(unsigned int layer) {
1275  if (layer == 6)
1276  return n_minidoublets_by_layer_barrel_[layer];
1277  else
1279 }
1280 
1281 unsigned int LSTEvent::getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer) {
1282  return n_minidoublets_by_layer_barrel_[layer];
1283 }
1284 
1285 unsigned int LSTEvent::getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer) {
1286  return n_minidoublets_by_layer_endcap_[layer];
1287 }
1288 
1290  unsigned int segments = 0;
1291  for (auto& it : n_segments_by_layer_barrel_) {
1292  segments += it;
1293  }
1294  for (auto& it : n_segments_by_layer_endcap_) {
1295  segments += it;
1296  }
1297 
1298  return segments;
1299 }
1300 
1301 unsigned int LSTEvent::getNumberOfSegmentsByLayer(unsigned int layer) {
1302  if (layer == 6)
1303  return n_segments_by_layer_barrel_[layer];
1304  else
1306 }
1307 
1308 unsigned int LSTEvent::getNumberOfSegmentsByLayerBarrel(unsigned int layer) {
1309  return n_segments_by_layer_barrel_[layer];
1310 }
1311 
1312 unsigned int LSTEvent::getNumberOfSegmentsByLayerEndcap(unsigned int layer) {
1313  return n_segments_by_layer_endcap_[layer];
1314 }
1315 
1317  unsigned int triplets = 0;
1318  for (auto& it : n_triplets_by_layer_barrel_) {
1319  triplets += it;
1320  }
1321  for (auto& it : n_triplets_by_layer_endcap_) {
1322  triplets += it;
1323  }
1324 
1325  return triplets;
1326 }
1327 
1328 unsigned int LSTEvent::getNumberOfTripletsByLayer(unsigned int layer) {
1329  if (layer == 6)
1330  return n_triplets_by_layer_barrel_[layer];
1331  else
1333 }
1334 
1335 unsigned int LSTEvent::getNumberOfTripletsByLayerBarrel(unsigned int layer) {
1336  return n_triplets_by_layer_barrel_[layer];
1337 }
1338 
1339 unsigned int LSTEvent::getNumberOfTripletsByLayerEndcap(unsigned int layer) {
1340  return n_triplets_by_layer_endcap_[layer];
1341 }
1342 
1344  auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
1345 
1346  alpaka::memcpy(
1347  queue_, nPixelTriplets_buf_h, cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->nPixelTriplets()));
1349 
1350  return *nPixelTriplets_buf_h.data();
1351 }
1352 
1354  auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
1355 
1356  alpaka::memcpy(queue_,
1357  nPixelQuintuplets_buf_h,
1358  cms::alpakatools::make_device_view(queue_, (*pixelQuintupletsDC_)->nPixelQuintuplets()));
1360 
1361  return *nPixelQuintuplets_buf_h.data();
1362 }
1363 
1365  unsigned int quintuplets = 0;
1366  for (auto& it : n_quintuplets_by_layer_barrel_) {
1367  quintuplets += it;
1368  }
1369  for (auto& it : n_quintuplets_by_layer_endcap_) {
1370  quintuplets += it;
1371  }
1372 
1373  return quintuplets;
1374 }
1375 
1376 unsigned int LSTEvent::getNumberOfQuintupletsByLayer(unsigned int layer) {
1377  if (layer == 6)
1378  return n_quintuplets_by_layer_barrel_[layer];
1379  else
1381 }
1382 
1383 unsigned int LSTEvent::getNumberOfQuintupletsByLayerBarrel(unsigned int layer) {
1384  return n_quintuplets_by_layer_barrel_[layer];
1385 }
1386 
1387 unsigned int LSTEvent::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) {
1388  return n_quintuplets_by_layer_endcap_[layer];
1389 }
1390 
1392  auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
1393 
1394  alpaka::memcpy(queue_,
1395  nTrackCandidates_buf_h,
1396  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidates()));
1398 
1399  return *nTrackCandidates_buf_h.data();
1400 }
1401 
1403  auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
1404 
1405  alpaka::memcpy(queue_,
1406  nTrackCandidatesPT5_buf_h,
1407  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespT5()));
1409 
1410  return *nTrackCandidatesPT5_buf_h.data();
1411 }
1412 
1414  auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
1415 
1416  alpaka::memcpy(queue_,
1417  nTrackCandidatesPT3_buf_h,
1418  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespT3()));
1420 
1421  return *nTrackCandidatesPT3_buf_h.data();
1422 }
1423 
1425  auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
1426 
1427  alpaka::memcpy(queue_,
1428  nTrackCandidatesPLS_buf_h,
1429  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespLS()));
1431 
1432  return *nTrackCandidatesPLS_buf_h.data();
1433 }
1434 
1436  auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
1437  auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
1438 
1439  alpaka::memcpy(queue_,
1440  nTrackCandidates_buf_h,
1441  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidates()));
1442  alpaka::memcpy(queue_,
1443  nTrackCandidatesT5_buf_h,
1444  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatesT5()));
1446 
1447  return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data());
1448 }
1449 
1451  auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
1452 
1453  alpaka::memcpy(queue_,
1454  nTrackCandidatesT5_buf_h,
1455  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatesT5()));
1457 
1458  return *nTrackCandidatesT5_buf_h.data();
1459 }
1460 
1461 template <typename TSoA, typename TDev>
1462 typename TSoA::ConstView LSTEvent::getHits(bool inCMSSW, bool sync) {
1463  if constexpr (std::is_same_v<TDev, DevHost>) {
1464  return hitsDC_->const_view<TSoA>();
1465  } else {
1466  if (!hitsHC_) {
1467  if (inCMSSW) {
1468  auto hits_d = hitsDC_->view<HitsSoA>();
1469  auto nHits = hits_d.metadata().size();
1470  std::array<int, 2> const hits_sizes{{static_cast<int>(nHits), static_cast<int>(nModules_)}};
1471  hitsHC_.emplace(hits_sizes, queue_);
1472  auto hits_h = hitsHC_->view<HitsSoA>();
1473  auto idxs_h = cms::alpakatools::make_host_view(hits_h.idxs(), nHits);
1474  auto idxs_d = cms::alpakatools::make_device_view(queue_, hits_d.idxs(), nHits);
1475  alpaka::memcpy(queue_, idxs_h, idxs_d);
1476  } else {
1478  queue_, *hitsDC_));
1479  }
1480  if (sync)
1481  alpaka::wait(queue_); // host consumers expect filled data
1482  }
1483  return hitsHC_->const_view<TSoA>();
1484  }
1485 }
1486 template HitsConst LSTEvent::getHits<HitsSoA>(bool, bool);
1487 template HitsRangesConst LSTEvent::getHits<HitsRangesSoA>(bool, bool);
1488 
1489 template <typename TDev>
1491  if constexpr (std::is_same_v<TDev, DevHost>) {
1492  return rangesDC_->const_view();
1493  } else {
1494  if (!rangesHC_) {
1495  rangesHC_.emplace(
1497  if (sync)
1498  alpaka::wait(queue_); // host consumers expect filled data
1499  }
1500  return rangesHC_->const_view();
1501  }
1502 }
1503 template ObjectRangesConst LSTEvent::getRanges<>(bool);
1504 
1505 template <typename TSoA, typename TDev>
1506 typename TSoA::ConstView LSTEvent::getMiniDoublets(bool sync) {
1507  if constexpr (std::is_same_v<TDev, DevHost>) {
1508  return miniDoubletsDC_->const_view<TSoA>();
1509  } else {
1510  if (!miniDoubletsHC_) {
1511  miniDoubletsHC_.emplace(
1514  *miniDoubletsDC_));
1515  if (sync)
1516  alpaka::wait(queue_); // host consumers expect filled data
1517  }
1518  return miniDoubletsHC_->const_view<TSoA>();
1519  }
1520 }
1521 template MiniDoubletsConst LSTEvent::getMiniDoublets<MiniDoubletsSoA>(bool);
1522 template MiniDoubletsOccupancyConst LSTEvent::getMiniDoublets<MiniDoubletsOccupancySoA>(bool);
1523 
1524 template <typename TSoA, typename TDev>
1525 typename TSoA::ConstView LSTEvent::getSegments(bool sync) {
1526  if constexpr (std::is_same_v<TDev, DevHost>) {
1527  return segmentsDC_->const_view<TSoA>();
1528  } else {
1529  if (!segmentsHC_) {
1530  segmentsHC_.emplace(
1533  queue_, *segmentsDC_));
1534  if (sync)
1535  alpaka::wait(queue_); // host consumers expect filled data
1536  }
1537  return segmentsHC_->const_view<TSoA>();
1538  }
1539 }
1540 template SegmentsConst LSTEvent::getSegments<SegmentsSoA>(bool);
1541 template SegmentsOccupancyConst LSTEvent::getSegments<SegmentsOccupancySoA>(bool);
1542 template SegmentsPixelConst LSTEvent::getSegments<SegmentsPixelSoA>(bool);
1543 
1544 template <typename TSoA, typename TDev>
1545 typename TSoA::ConstView LSTEvent::getTriplets(bool sync) {
1546  if constexpr (std::is_same_v<TDev, DevHost>) {
1547  return tripletsDC_->const_view<TSoA>();
1548  } else {
1549  if (!tripletsHC_) {
1550  tripletsHC_.emplace(
1552  queue_, *tripletsDC_));
1553 
1554  if (sync)
1555  alpaka::wait(queue_); // host consumers expect filled data
1556  }
1557  }
1558  return tripletsHC_->const_view<TSoA>();
1559 }
1560 template TripletsConst LSTEvent::getTriplets<TripletsSoA>(bool);
1561 template TripletsOccupancyConst LSTEvent::getTriplets<TripletsOccupancySoA>(bool);
1562 
1563 template <typename TSoA, typename TDev>
1564 typename TSoA::ConstView LSTEvent::getQuintuplets(bool sync) {
1565  if constexpr (std::is_same_v<TDev, DevHost>) {
1566  return quintupletsDC_->const_view<TSoA>();
1567  } else {
1568  if (!quintupletsHC_) {
1569  quintupletsHC_.emplace(
1571  queue_, *quintupletsDC_));
1572 
1573  if (sync)
1574  alpaka::wait(queue_); // host consumers expect filled data
1575  }
1576  }
1577  return quintupletsHC_->const_view<TSoA>();
1578 }
1579 template QuintupletsConst LSTEvent::getQuintuplets<QuintupletsSoA>(bool);
1580 template QuintupletsOccupancyConst LSTEvent::getQuintuplets<QuintupletsOccupancySoA>(bool);
1581 
1582 template <typename TDev>
1584  if constexpr (std::is_same_v<TDev, DevHost>) {
1585  return pixelTripletsDC_->const_view();
1586  } else {
1587  if (!pixelTripletsHC_) {
1590 
1591  if (sync)
1592  alpaka::wait(queue_); // host consumers expect filled data
1593  }
1594  }
1595  return pixelTripletsHC_->const_view();
1596 }
1597 template PixelTripletsConst LSTEvent::getPixelTriplets<>(bool);
1598 
1599 template <typename TDev>
1601  if constexpr (std::is_same_v<TDev, DevHost>) {
1602  return pixelQuintupletsDC_->const_view();
1603  } else {
1604  if (!pixelQuintupletsHC_) {
1605  pixelQuintupletsHC_.emplace(
1608 
1609  if (sync)
1610  alpaka::wait(queue_); // host consumers expect filled data
1611  }
1612  }
1613  return pixelQuintupletsHC_->const_view();
1614 }
1615 template PixelQuintupletsConst LSTEvent::getPixelQuintuplets<>(bool);
1616 
1617 const TrackCandidatesConst& LSTEvent::getTrackCandidates(bool inCMSSW, bool sync) {
1618  if (!trackCandidatesHC_) {
1619  // Get nTrackCanHost parameter to initialize host based instance
1620  auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer<unsigned int>(queue_);
1621  alpaka::memcpy(queue_,
1622  nTrackCanHost_buf_h,
1623  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidates()));
1624  alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU becomes usable
1625 
1626  auto const nTrackCanHost = *nTrackCanHost_buf_h.data();
1627  trackCandidatesHC_.emplace(nTrackCanHost, queue_);
1628 
1629  (*trackCandidatesHC_)->nTrackCandidates() = nTrackCanHost;
1630  alpaka::memcpy(queue_,
1631  cms::alpakatools::make_host_view((*trackCandidatesHC_)->hitIndices()->data(),
1632  Params_pT5::kHits * nTrackCanHost),
1634  queue_, (*trackCandidatesDC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost));
1635  alpaka::memcpy(queue_,
1636  cms::alpakatools::make_host_view((*trackCandidatesHC_)->pixelSeedIndex(), nTrackCanHost),
1637  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->pixelSeedIndex(), nTrackCanHost));
1638  if (not inCMSSW) {
1639  alpaka::memcpy(queue_,
1640  cms::alpakatools::make_host_view((*trackCandidatesHC_)->logicalLayers()->data(),
1641  Params_pT5::kLayers * nTrackCanHost),
1643  queue_, (*trackCandidatesDC_)->logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost));
1644  alpaka::memcpy(
1645  queue_,
1646  cms::alpakatools::make_host_view((*trackCandidatesHC_)->directObjectIndices(), nTrackCanHost),
1647  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->directObjectIndices(), nTrackCanHost));
1648  alpaka::memcpy(
1649  queue_,
1650  cms::alpakatools::make_host_view((*trackCandidatesHC_)->objectIndices()->data(), 2 * nTrackCanHost),
1652  queue_, (*trackCandidatesDC_)->objectIndices()->data(), 2 * nTrackCanHost));
1653  }
1654  alpaka::memcpy(
1655  queue_,
1656  cms::alpakatools::make_host_view((*trackCandidatesHC_)->trackCandidateType(), nTrackCanHost),
1657  cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->trackCandidateType(), nTrackCanHost));
1658  if (sync)
1659  alpaka::wait(queue_); // host consumers expect filled data
1660  }
1661  return trackCandidatesHC_.value().const_view();
1662 }
1663 
1664 template <typename TSoA, typename TDev>
1665 typename TSoA::ConstView LSTEvent::getModules(bool sync) {
1666  if constexpr (std::is_same_v<TDev, DevHost>) {
1667  return modules_.const_view<TSoA>();
1668  } else {
1669  if (!modulesHC_) {
1670  modulesHC_.emplace(
1672  queue_, modules_));
1673  if (sync)
1674  alpaka::wait(queue_); // host consumers expect filled data
1675  }
1676  return modulesHC_->const_view<TSoA>();
1677  }
1678 }
1679 template ModulesConst LSTEvent::getModules<ModulesSoA>(bool);
1680 template ModulesPixelConst LSTEvent::getModules<ModulesPixelSoA>(bool);
constexpr unsigned int n_max_pixel_md_per_modules
Definition: Common.h:33
unsigned int getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer)
constexpr unsigned int n_max_nonpixel_track_candidates
Definition: Common.h:39
PixelQuintupletsConst getPixelQuintuplets(bool sync=true)
const TrackCandidatesConst & getTrackCandidates(bool inCMSSW=false, bool sync=true)
std::optional< TripletsHostCollection > tripletsHC_
Definition: LSTEvent.h:66
unsigned int getNumberOfTripletsByLayerEndcap(unsigned int layer)
TSoA::ConstView getQuintuplets(bool sync=true)
std::array< unsigned int, 6 > n_triplets_by_layer_barrel_
Definition: LSTEvent.h:44
ModulesSoALayout<> ModulesSoA
Definition: ModulesSoA.h:43
constexpr unsigned int max_blocks
Definition: Common.h:28
TSoA::ConstView getTriplets(bool sync=true)
ModulesDeviceCollection const & modules_
Definition: LSTEvent.h:77
std::optional< PixelQuintupletsDeviceCollection > pixelQuintupletsDC_
Definition: LSTEvent.h:59
std::optional< PixelTripletsHostCollection > pixelTripletsHC_
Definition: LSTEvent.h:70
uint32_t Idx
Definition: config.h:14
::PortableCollection< T, Device > PortableCollection
std::optional< TrackCandidatesDeviceCollection > trackCandidatesDC_
Definition: LSTEvent.h:57
PixelTripletsSoA::ConstView PixelTripletsConst
TripletsOccupancySoALayout<> TripletsOccupancySoA
Definition: TripletsSoA.h:37
void addPixelSegmentToEvent(std::vector< unsigned int > const &hitIndices0, std::vector< unsigned int > const &hitIndices1, std::vector< unsigned int > const &hitIndices2, std::vector< unsigned int > const &hitIndices3, std::vector< float > const &dPhiChange, std::vector< float > const &ptIn, std::vector< float > const &ptErr, std::vector< float > const &px, std::vector< float > const &py, std::vector< float > const &pz, std::vector< float > const &eta, std::vector< float > const &etaErr, std::vector< float > const &phi, std::vector< int > const &charge, std::vector< unsigned int > const &seedIdx, std::vector< int > const &superbin, std::vector< PixelType > const &pixelType, std::vector< char > const &isQuad)
constexpr unsigned int n_max_pixel_segments_per_module
Definition: Common.h:31
SegmentsOccupancySoALayout<> SegmentsOccupancySoA
Definition: SegmentsSoA.h:45
std::array< unsigned int, 5 > n_triplets_by_layer_endcap_
Definition: LSTEvent.h:45
std::array< unsigned int, 6 > n_segments_by_layer_barrel_
Definition: LSTEvent.h:42
ALPAKA_ACCELERATOR_NAMESPACE::Device Device
Definition: LSTEvent.dev.cc:13
typename traits::PortableMultiCollectionTrait< TDev, T0, Args... >::CollectionType PortableMultiCollection
std::optional< QuintupletsDeviceCollection > quintupletsDC_
Definition: LSTEvent.h:56
std::optional< ModulesHostCollection > modulesHC_
Definition: LSTEvent.h:68
std::array< unsigned int, 5 > n_quintuplets_by_layer_endcap_
Definition: LSTEvent.h:47
std::array< unsigned int, 5 > n_segments_by_layer_endcap_
Definition: LSTEvent.h:43
void pixelLineSegmentCleaning(bool no_pls_dupclean)
constexpr unsigned int n_max_pixel_triplets
Definition: Common.h:35
std::enable_if_t< alpaka::isDevice< TDev > and not std::is_array_v< T >, device_view< TDev, T > > make_device_view(TDev const &device, T &data)
Definition: memory.h:260
PixelType
Definition: Common.h:18
Vec3D constexpr elementsPerThread(Vec3D::all(static_cast< Idx >(1)))
ObjectRangesConst getRanges(bool sync=true)
unsigned int getNumberOfQuintupletsByLayer(unsigned int layer)
unsigned int getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer)
void addHitToEvent(std::vector< float > const &x, std::vector< float > const &y, std::vector< float > const &z, std::vector< unsigned int > const &detId, std::vector< unsigned int > const &idxInNtuple)
Definition: LSTEvent.dev.cc:75
SegmentsOccupancySoA::View SegmentsOccupancy
Definition: SegmentsSoA.h:50
ALPAKA_ACCELERATOR_NAMESPACE::Queue Queue
Definition: LSTEvent.dev.cc:14
std::optional< PixelTripletsDeviceCollection > pixelTripletsDC_
Definition: LSTEvent.h:58
TSoA::ConstView getModules(bool sync=true)
WorkDiv< Dim1D > WorkDiv1D
Definition: config.h:32
SegmentsSoALayout<> SegmentsSoA
Definition: SegmentsSoA.h:44
unsigned int getNumberOfMiniDoubletsByLayer(unsigned int layer)
unsigned int getNumberOfTripletsByLayer(unsigned int layer)
TripletsSoA::ConstView TripletsConst
Definition: TripletsSoA.h:31
ModulesPixelSoALayout<> ModulesPixelSoA
Definition: ModulesSoA.h:44
QuintupletsSoALayout<> QuintupletsSoA
TrackCandidatesSoA::ConstView TrackCandidatesConst
TSoA::ConstView getMiniDoublets(bool sync=true)
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float phi(TAcc const &acc, float x, float y)
Definition: Hit.h:29
ModulesPixelSoA::ConstView ModulesPixelConst
Definition: ModulesSoA.h:49
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
SegmentsPixelSoA::View SegmentsPixel
Definition: SegmentsSoA.h:52
ALPAKA_ACCELERATOR_NAMESPACE::Acc3D Acc3D
Definition: LSTEvent.dev.cc:16
MiniDoubletsOccupancySoALayout<> MiniDoubletsOccupancySoA
TSoA::ConstView getSegments(bool sync=true)
HitsRangesSoALayout<> HitsRangesSoA
Definition: HitsSoA.h:30
void copyAsync(device::unique_ptr< T > &dst, const host::unique_ptr< T > &src, cudaStream_t stream)
Definition: copyAsync.h:20
unsigned int getNumberOfSegmentsByLayerEndcap(unsigned int layer)
unsigned int getNumberOfSegmentsByLayerBarrel(unsigned int layer)
MiniDoubletsSoA::ConstView MiniDoubletsConst
HitsSoA::ConstView HitsConst
Definition: HitsSoA.h:33
EndcapGeometryDevDeviceCollection const & endcapGeometry_
Definition: LSTEvent.h:79
ModulesSoA::ConstView ModulesConst
Definition: ModulesSoA.h:47
HitsRangesSoA::ConstView HitsRangesConst
Definition: HitsSoA.h:35
const std::vector< int > & module_layers()
Definition: LSTEff.cc:3388
HitsSoALayout<> HitsSoA
Definition: HitsSoA.h:29
MiniDoubletsSoALayout<> MiniDoubletsSoA
std::array< unsigned int, 6 > n_quintuplets_by_layer_barrel_
Definition: LSTEvent.h:46
SegmentsPixelSoALayout<> SegmentsPixelSoA
Definition: SegmentsSoA.h:46
std::optional< ObjectRangesHostCollection > rangesHC_
Definition: LSTEvent.h:62
SegmentsPixelSoA::ConstView SegmentsPixelConst
Definition: SegmentsSoA.h:53
std::optional< SegmentsHostCollection > segmentsHC_
Definition: LSTEvent.h:65
constexpr unsigned int n_max_pixel_track_candidates
Definition: Common.h:38
const std::vector< int > & module_subdets()
Definition: LSTEff.cc:3288
PixelTripletsConst getPixelTriplets(bool sync=true)
TSoA::ConstView getHits(bool inCMSSW=false, bool sync=true)
PixelQuintupletsSoA::ConstView PixelQuintupletsConst
std::optional< ObjectRangesDeviceCollection > rangesDC_
Definition: LSTEvent.h:51
Vec< Dim3D > Vec3D
Definition: config.h:27
std::optional< TripletsDeviceCollection > tripletsDC_
Definition: LSTEvent.h:55
std::optional< HitsDeviceCollection > hitsDC_
Definition: LSTEvent.h:52
SegmentsOccupancySoA::ConstView SegmentsOccupancyConst
Definition: SegmentsSoA.h:51
QuintupletsSoA::ConstView QuintupletsConst
void createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets)
std::array< unsigned int, 6 > n_minidoublets_by_layer_barrel_
Definition: LSTEvent.h:40
unsigned int getNumberOfSegmentsByLayer(unsigned int layer)
TripletsOccupancySoA::ConstView TripletsOccupancyConst
Definition: TripletsSoA.h:39
ObjectRangesSoA::ConstView ObjectRangesConst
std::optional< PixelQuintupletsHostCollection > pixelQuintupletsHC_
Definition: LSTEvent.h:71
WorkDiv< Dim3D > WorkDiv3D
Definition: config.h:34
std::optional< QuintupletsHostCollection > quintupletsHC_
Definition: LSTEvent.h:69
SegmentsSoA::ConstView SegmentsConst
Definition: SegmentsSoA.h:49
float x
std::optional< SegmentsDeviceCollection > segmentsDC_
Definition: LSTEvent.h:54
QuintupletsOccupancySoA::ConstView QuintupletsOccupancyConst
std::optional< MiniDoubletsHostCollection > miniDoubletsHC_
Definition: LSTEvent.h:64
constexpr unsigned int n_max_pixel_quintuplets
Definition: Common.h:36
std::array< unsigned int, 5 > n_minidoublets_by_layer_endcap_
Definition: LSTEvent.h:41
std::optional< HitsHostCollection > hitsHC_
Definition: LSTEvent.h:63
TupleMultiplicity< TrackerTraits > const *__restrict__ uint32_t nHits
TripletsSoALayout<> TripletsSoA
Definition: TripletsSoA.h:29
std::enable_if_t< not std::is_array_v< T >, host_view< T > > make_host_view(T &data)
Definition: memory.h:153
std::optional< TrackCandidatesHostCollection > trackCandidatesHC_
Definition: LSTEvent.h:67
unsigned int getNumberOfQuintupletsByLayerBarrel(unsigned int layer)
MiniDoubletsOccupancySoA::ConstView MiniDoubletsOccupancyConst
ALPAKA_ACCELERATOR_NAMESPACE::Acc1D Acc1D
Definition: LSTEvent.dev.cc:15
QuintupletsOccupancySoALayout<> QuintupletsOccupancySoA
constexpr unsigned int size_superbins
Definition: Common.h:41
unsigned int getNumberOfTripletsByLayerBarrel(unsigned int layer)
ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv< typename Vec::Dim > createWorkDiv(const Vec &blocksPerGrid, const Vec &threadsPerBlock, const Vec &elementsPerThreadArg)
Definition: Common.h:16
std::optional< MiniDoubletsDeviceCollection > miniDoubletsDC_
Definition: LSTEvent.h:53
unsigned int getNumberOfQuintupletsByLayerEndcap(unsigned int layer)
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float eta(TAcc const &acc, float x, float y, float z)
Definition: Hit.h:11