CMS 3D CMS Logo

TrackingRecHit2DHeterogeneous.h
Go to the documentation of this file.
1 #ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
2 #define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
3 
7 
8 template <typename Traits>
10 public:
11  enum class Storage32 {
12  kXLocal = 0,
13  kYLocal = 1,
14  kXerror = 2,
15  kYerror = 3,
16  kCharge = 4,
17  kXGlobal = 5,
18  kYGlobal = 6,
19  kZGlobal = 7,
20  kRGlobal = 8,
21  kPhiStorage = 9,
22  kLayers = 10
23  };
24 
25  enum class Storage16 {
26  kDetId = 0,
27  kPhi = 1,
28  kXSize = 2,
29  kYSize = 3,
30  };
31 
32  template <typename T>
34 
36 
38 
40  uint32_t nHits,
41  bool isPhase2,
42  int32_t offsetBPIX2,
43  pixelCPEforGPU::ParamsOnGPU const* cpeParams,
44  uint32_t const* hitsModuleStart,
45  cudaStream_t stream,
47 
49  float* store32, uint16_t* store16, uint32_t* modules, int nHits, cudaStream_t stream = nullptr);
51 
56 
57  TrackingRecHit2DSOAView* view() { return m_view.get(); }
58  TrackingRecHit2DSOAView const* view() const { return m_view.get(); }
59 
60  auto nHits() const { return m_nHits; }
61  auto nMaxModules() const { return m_nMaxModules; }
62  auto offsetBPIX2() const { return m_offsetBPIX2; }
63 
64  auto hitsModuleStart() const { return m_hitsModuleStart; }
65  auto hitsLayerStart() { return m_hitsLayerStart; }
66  auto phiBinner() { return m_phiBinner; }
68  auto iphi() { return m_iphi; }
69 
71 
73 
76 
77  // needs specialization for Host
79 
80 private:
81  static constexpr uint32_t n16 = 4; // number of elements in m_store16
82  static constexpr uint32_t n32 = 10; // number of elements in m_store32
83  static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious
84  static_assert(n32 == static_cast<uint32_t>(Storage32::kLayers));
87 
90 
92 
93  uint32_t m_nHits;
94  int32_t m_offsetBPIX2;
95 
96  uint32_t const* m_hitsModuleStart; // needed for legacy, this is on GPU!
97 
98  uint32_t m_nMaxModules;
99  // needed as kernel params...
102  uint32_t* m_hitsLayerStart;
103  int16_t* m_iphi;
104 };
105 
109 
112 
113 template <typename Traits>
115  uint32_t nHits,
116  bool isPhase2,
117  int32_t offsetBPIX2,
118  pixelCPEforGPU::ParamsOnGPU const* cpeParams,
119  uint32_t const* hitsModuleStart,
120  cudaStream_t stream,
122  : m_nHits(nHits), m_offsetBPIX2(offsetBPIX2), m_hitsModuleStart(hitsModuleStart) {
123  auto view = Traits::template make_host_unique<TrackingRecHit2DSOAView>(stream);
124 
126 
127  view->m_nHits = nHits;
129  m_view = Traits::template make_unique<TrackingRecHit2DSOAView>(stream); // leave it on host and pass it by value?
130  m_AverageGeometryStore = Traits::template make_unique<TrackingRecHit2DSOAView::AverageGeometry>(stream);
132  view->m_cpeParams = cpeParams;
134 
135  // if empy do not bother
136  if (0 == nHits) {
137  if constexpr (std::is_same_v<Traits, cms::cudacompat::GPUTraits>) {
139  } else {
140  m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
141  }
142  return;
143  }
144 
145  // the single arrays are not 128 bit alligned...
146  // the hits are actually accessed in order only in building
147  // if ordering is relevant they may have to be stored phi-ordered by layer or so
148  // this will break 1to1 correspondence with cluster and module locality
149  // so unless proven VERY inefficient we keep it ordered as generated
150 
151  // host copy is "reduced" (to be reviewed at some point)
152  if constexpr (std::is_same_v<Traits, cms::cudacompat::HostTraits>) {
153  // it has to compile for ALL cases
155  } else {
156  assert(input == nullptr);
157 
159 
160  m_store16 = Traits::template make_unique<uint16_t[]>(nHits * n16, stream);
161  m_store32 = Traits::template make_unique<float[]>(nHits * n32 + nL + 1, stream);
162  m_PhiBinnerStore = Traits::template make_unique<TrackingRecHit2DSOAView::PhiBinner>(stream);
163  }
164 
165  static_assert(sizeof(TrackingRecHit2DSOAView::hindex_type) == sizeof(float));
167 
168  auto get32 = [&](Storage32 i) { return m_store32.get() + static_cast<int>(i) * nHits; };
169 
170  // copy all the pointers
174 
175  view->m_xl = get32(Storage32::kXLocal);
176  view->m_yl = get32(Storage32::kYLocal);
177  view->m_xerr = get32(Storage32::kXerror);
178  view->m_yerr = get32(Storage32::kYerror);
179  view->m_chargeAndStatus = reinterpret_cast<uint32_t*>(get32(Storage32::kCharge));
180 
181  if constexpr (!std::is_same_v<Traits, cms::cudacompat::HostTraits>) {
182  assert(input == nullptr);
183  view->m_xg = get32(Storage32::kXGlobal);
184  view->m_yg = get32(Storage32::kYGlobal);
185  view->m_zg = get32(Storage32::kZGlobal);
186  view->m_rg = get32(Storage32::kRGlobal);
187 
188  auto get16 = [&](Storage16 i) { return m_store16.get() + static_cast<int>(i) * nHits; };
189  m_iphi = view->m_iphi = reinterpret_cast<int16_t*>(get16(Storage16::kPhi));
190 
191  view->m_xsize = reinterpret_cast<int16_t*>(get16(Storage16::kXSize));
192  view->m_ysize = reinterpret_cast<int16_t*>(get16(Storage16::kYSize));
193  view->m_detInd = get16(Storage16::kDetId);
194 
196  m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast<uint32_t*>(get32(Storage32::kLayers));
197  }
198 
199  // transfer view
200  if constexpr (std::is_same_v<Traits, cms::cudacompat::GPUTraits>) {
202  } else {
203  m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
204  }
205 }
206 
207 //this is intended to be used only for CPU SoA but doesn't hurt to have it for all cases
208 template <typename Traits>
210  float* store32, uint16_t* store16, uint32_t* modules, int nHits, cudaStream_t stream)
211  : m_nHits(nHits), m_hitsModuleStart(modules) {
212  auto view = Traits::template make_host_unique<TrackingRecHit2DSOAView>(stream);
213 
214  m_view = Traits::template make_unique<TrackingRecHit2DSOAView>(stream);
215 
216  view->m_nHits = nHits;
217 
218  if (0 == nHits) {
219  if constexpr (std::is_same_v<Traits, cms::cudacompat::GPUTraits>) {
221  } else {
222  m_view = std::move(view);
223  }
224  return;
225  }
226 
227  m_store16 = Traits::template make_unique<uint16_t[]>(nHits * n16, stream);
228  m_store32 = Traits::template make_unique<float[]>(nHits * n32, stream);
229  m_PhiBinnerStore = Traits::template make_unique<TrackingRecHit2DSOAView::PhiBinner>(stream);
230  m_AverageGeometryStore = Traits::template make_unique<TrackingRecHit2DSOAView::AverageGeometry>(stream);
231 
234 
235  //store transfer
236  if constexpr (std::is_same_v<Traits, cms::cudacompat::GPUTraits>) {
239  } else {
240  std::copy(store32, store32 + nHits * n32, m_store32.get()); // want to copy it
241  std::copy(store16, store16 + nHits * n16, m_store16.get());
242  }
243 
244  //getters
245  auto get32 = [&](Storage32 i) { return m_store32.get() + static_cast<int>(i) * nHits; };
246  auto get16 = [&](Storage16 i) { return m_store16.get() + static_cast<int>(i) * nHits; };
247 
248  //Store 32
249  view->m_xl = get32(Storage32::kXLocal);
250  view->m_yl = get32(Storage32::kYLocal);
251  view->m_xerr = get32(Storage32::kXerror);
252  view->m_yerr = get32(Storage32::kYerror);
253  view->m_chargeAndStatus = reinterpret_cast<uint32_t*>(get32(Storage32::kCharge));
254  view->m_xg = get32(Storage32::kXGlobal);
255  view->m_yg = get32(Storage32::kYGlobal);
256  view->m_zg = get32(Storage32::kZGlobal);
257  view->m_rg = get32(Storage32::kRGlobal);
258 
262 
263  //Store 16
264  view->m_detInd = get16(Storage16::kDetId);
265  m_iphi = view->m_iphi = reinterpret_cast<int16_t*>(get16(Storage16::kPhi));
266  view->m_xsize = reinterpret_cast<int16_t*>(get16(Storage16::kXSize));
267  view->m_ysize = reinterpret_cast<int16_t*>(get16(Storage16::kYSize));
268 
269  // transfer view
270  if constexpr (std::is_same_v<Traits, cms::cudacompat::GPUTraits>) {
272  } else {
273  m_view = std::move(view);
274  }
275 }
276 
277 #endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
cms::cuda::host::unique_ptr< float[]> localCoordToHostAsync(cudaStream_t stream) const
constexpr uint32_t numberOfLayers
cms::cuda::host::unique_ptr< uint32_t[]> hitsModuleStartToHostAsync(cudaStream_t stream) const
void copyFromGPU(TrackingRecHit2DHeterogeneous< cms::cudacompat::GPUTraits > const *input, cudaStream_t stream)
unique_ptr< TrackingRecHit2DSOAView > m_view
constexpr uint32_t numberOfModules
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
assert(be >=bs)
static std::string const input
Definition: EdmProvDump.cc:47
typename Traits::template unique_ptr< T > unique_ptr
typename Base::index_type index_type
cms::cuda::HistoContainer< int16_t, 256, -1, 8 *sizeof(int16_t), hindex_type, pixelTopology::maxLayers > PhiBinner
unique_ptr< TrackingRecHit2DSOAView::AverageGeometry > m_AverageGeometryStore
void copyAsync(device::unique_ptr< T > &dst, const host::unique_ptr< T > &src, cudaStream_t stream)
Definition: copyAsync.h:20
cms::cuda::host::unique_ptr< float[]> store32ToHostAsync(cudaStream_t stream) const
def template(fileName, svg, replaceme="REPLACEME")
Definition: svgfig.py:521
PhiBinner::index_type * m_phiBinnerStorage
TrackingRecHit2DHeterogeneous & operator=(const TrackingRecHit2DHeterogeneous &)=delete
TrackingRecHit2DSOAView const * view() const
constexpr uint32_t numberOfModules
caConstants::TupleMultiplicity const CAHitNtupletGeneratorKernelsGPU::HitToTuple const cms::cuda::AtomicPairCounter GPUCACell const *__restrict__ uint32_t const *__restrict__ gpuPixelDoublets::CellNeighborsVector const gpuPixelDoublets::CellTracksVector const GPUCACell::OuterHitOfCell const int32_t nHits
constexpr uint8_t numberOfLayers
std::unique_ptr< T, impl::HostDeleter > unique_ptr
pixelCPEforGPU::ParamsOnGPU const * m_cpeParams
def move(src, dest)
Definition: eostools.py:511
cms::cuda::host::unique_ptr< uint16_t[]> store16ToHostAsync(cudaStream_t stream) const
unique_ptr< TrackingRecHit2DSOAView::PhiBinner > m_PhiBinnerStore