CMS 3D CMS Logo

HcalDigisProducerGPU.cc
Go to the documentation of this file.
1 #include <iostream>
2 
14 
15 class HcalDigisProducerGPU : public edm::stream::EDProducer<edm::ExternalWork> {
16 public:
17  explicit HcalDigisProducerGPU(edm::ParameterSet const& ps);
18  ~HcalDigisProducerGPU() override = default;
20 
21 private:
23  void produce(edm::Event&, edm::EventSetup const&) override;
24 
25 private:
26  // input product tokens
29 
30  // type aliases
31  using HostCollectionf01 =
34  using HostCollectionf5 =
37  using HostCollectionf3 =
40 
41  // output product tokens
48 
50 
53  };
55 
56  // per event host buffers
60 
61  // device products: product owns memory (i.e. not the module)
65 };
66 
69 
70  // FIXME
71  desc.add<edm::InputTag>("hbheDigisLabel", edm::InputTag("hcalDigis"));
72  desc.add<edm::InputTag>("qie11DigiLabel", edm::InputTag("hcalDigis"));
73  desc.add<std::string>("digisLabelF01HE", std::string{"f01HEDigisGPU"});
74  desc.add<std::string>("digisLabelF5HB", std::string{"f5HBDigisGPU"});
75  desc.add<std::string>("digisLabelF3HB", std::string{"f3HBDigisGPU"});
76  desc.add<uint32_t>("maxChannelsF01HE", 10000u);
77  desc.add<uint32_t>("maxChannelsF5HB", 10000u);
78  desc.add<uint32_t>("maxChannelsF3HB", 10000u);
79 
80  confDesc.addWithDefaultLabel(desc);
81 }
82 
84  : hbheDigiToken_{consumes<HBHEDigiCollection>(ps.getParameter<edm::InputTag>("hbheDigisLabel"))},
85  qie11DigiToken_{consumes<QIE11DigiCollection>(ps.getParameter<edm::InputTag>("qie11DigiLabel"))},
86  digisF01HEToken_{produces<ProductTypef01>(ps.getParameter<std::string>("digisLabelF01HE"))},
87  digisF5HBToken_{produces<ProductTypef5>(ps.getParameter<std::string>("digisLabelF5HB"))},
88  digisF3HBToken_{produces<ProductTypef3>(ps.getParameter<std::string>("digisLabelF3HB"))} {
89  config_.maxChannelsF01HE = ps.getParameter<uint32_t>("maxChannelsF01HE");
90  config_.maxChannelsF5HB = ps.getParameter<uint32_t>("maxChannelsF5HB");
91  config_.maxChannelsF3HB = ps.getParameter<uint32_t>("maxChannelsF3HB");
92 
93  // this is a preallocation for the max statically known number of time samples
94  // actual stride/nsamples will be inferred from data
95  hf01_.stride = hcal::compute_stride<hcal::Flavor1>(QIE11DigiCollection::MAXSAMPLES);
96  hf5_.stride = hcal::compute_stride<hcal::Flavor5>(HBHEDataFrame::MAXSAMPLES);
97  hf3_.stride = hcal::compute_stride<hcal::Flavor3>(QIE11DigiCollection::MAXSAMPLES);
98 
99  // preallocate pinned host memory only if CUDA is available
101  if (cuda and cuda->enabled()) {
102  hf01_.reserve(config_.maxChannelsF01HE);
103  hf5_.reserve(config_.maxChannelsF5HB);
104  hf3_.reserve(config_.maxChannelsF3HB);
105  }
106 }
107 
109  edm::EventSetup const& setup,
111  // raii
112  cms::cuda::ScopedContextAcquire ctx{event.streamID(), std::move(holder), cudaState_};
113 
114  // clear host buffers
115  hf01_.clear();
116  hf5_.clear();
117  hf3_.clear();
118 
119  // event data
122  event.getByToken(hbheDigiToken_, hbheDigis);
123  event.getByToken(qie11DigiToken_, qie11Digis);
124 
125  // init f5 collection
126  if (not hbheDigis->empty()) {
127  auto const nsamples = (*hbheDigis)[0].size();
128  auto const stride = hcal::compute_stride<hcal::Flavor5>(nsamples);
129  hf5_.stride = stride;
130 
131  // flavor5 get device blobs
132  df5_.stride = stride;
133  df5_.data = cms::cuda::make_device_unique<uint16_t[]>(config_.maxChannelsF5HB * stride, ctx.stream());
134  df5_.ids = cms::cuda::make_device_unique<uint32_t[]>(config_.maxChannelsF5HB, ctx.stream());
135  df5_.npresamples = cms::cuda::make_device_unique<uint8_t[]>(config_.maxChannelsF5HB, ctx.stream());
136  }
137 
138  if (not qie11Digis->empty()) {
139  auto const nsamples = qie11Digis->samples();
140  auto const stride01 = hcal::compute_stride<hcal::Flavor1>(nsamples);
141  auto const stride3 = hcal::compute_stride<hcal::Flavor3>(nsamples);
142 
143  hf01_.stride = stride01;
144  hf3_.stride = stride3;
145 
146  // flavor 0/1 get devie blobs
147  df01_.stride = stride01;
148  df01_.data = cms::cuda::make_device_unique<uint16_t[]>(config_.maxChannelsF01HE * stride01, ctx.stream());
149  df01_.ids = cms::cuda::make_device_unique<uint32_t[]>(config_.maxChannelsF01HE, ctx.stream());
150 
151  // flavor3 get device blobs
152  df3_.stride = stride3;
153  df3_.data = cms::cuda::make_device_unique<uint16_t[]>(config_.maxChannelsF3HB * stride3, ctx.stream());
154  df3_.ids = cms::cuda::make_device_unique<uint32_t[]>(config_.maxChannelsF3HB, ctx.stream());
155  }
156 
157  for (auto const& hbhe : *hbheDigis) {
158  auto const id = hbhe.id().rawId();
159  auto const presamples = hbhe.presamples();
160  hf5_.ids.push_back(id);
161  hf5_.npresamples.push_back(presamples);
162  auto const stride = hcal::compute_stride<hcal::Flavor5>(hbhe.size());
163  assert(stride == hf5_.stride && "strides must be the same for every single digi of the collection");
164  // simple for now...
165  static_assert(hcal::Flavor5::HEADER_WORDS == 1);
166  uint16_t header_word = (1 << 15) | (0x5 << 12) | (0 << 10) | ((hbhe.sample(0).capid() & 0x3) << 8);
167  hf5_.data.push_back(header_word);
168  for (unsigned int i = 0; i < stride - hcal::Flavor5::HEADER_WORDS; i++) {
169  uint16_t s0 = (0 << 7) | (static_cast<uint8_t>(hbhe.sample(2 * i).adc()) & 0x7f);
170  uint16_t s1 = (0 << 7) | (static_cast<uint8_t>(hbhe.sample(2 * i + 1).adc()) & 0x7f);
171  uint16_t sample = (s1 << 8) | s0;
172  hf5_.data.push_back(sample);
173  }
174  }
175 
176  for (unsigned int i = 0; i < qie11Digis->size(); i++) {
177  auto const& digi = QIE11DataFrame{(*qie11Digis)[i]};
178  assert(digi.samples() == qie11Digis->samples() && "collection nsamples must equal per digi samples");
179  if (digi.flavor() == 0 or digi.flavor() == 1) {
180  if (digi.detid().subdetId() != HcalEndcap)
181  continue;
182  auto const id = digi.detid().rawId();
183  hf01_.ids.push_back(id);
184  for (int hw = 0; hw < hcal::Flavor1::HEADER_WORDS; hw++)
185  hf01_.data.push_back((*qie11Digis)[i][hw]);
186  for (int sample = 0; sample < digi.samples(); sample++) {
187  hf01_.data.push_back((*qie11Digis)[i][hcal::Flavor1::HEADER_WORDS + sample]);
188  }
189  } else if (digi.flavor() == 3) {
190  if (digi.detid().subdetId() != HcalBarrel)
191  continue;
192  auto const id = digi.detid().rawId();
193  hf3_.ids.push_back(id);
194  for (int hw = 0; hw < hcal::Flavor3::HEADER_WORDS; hw++)
195  hf3_.data.push_back((*qie11Digis)[i][hw]);
196  for (int sample = 0; sample < digi.samples(); sample++) {
197  hf3_.data.push_back((*qie11Digis)[i][hcal::Flavor3::HEADER_WORDS + sample]);
198  }
199  }
200  }
201 
202  auto lambdaToTransfer = [&ctx](auto* dest, auto const& src) {
203  if (src.empty())
204  return;
205  using vector_type = typename std::remove_reference<decltype(src)>::type;
206  using type = typename vector_type::value_type;
207  using dest_data_type = typename std::remove_pointer<decltype(dest)>::type;
208  static_assert(std::is_same<dest_data_type, type>::value && "Dest and Src data typesdo not match");
209  cudaCheck(cudaMemcpyAsync(dest, src.data(), src.size() * sizeof(type), cudaMemcpyHostToDevice, ctx.stream()));
210  };
211 
212  lambdaToTransfer(df01_.data.get(), hf01_.data);
213  lambdaToTransfer(df01_.ids.get(), hf01_.ids);
214 
215  lambdaToTransfer(df5_.data.get(), hf5_.data);
216  lambdaToTransfer(df5_.ids.get(), hf5_.ids);
217  lambdaToTransfer(df5_.npresamples.get(), hf5_.npresamples);
218 
219  lambdaToTransfer(df3_.data.get(), hf3_.data);
220  lambdaToTransfer(df3_.ids.get(), hf3_.ids);
221 
222  df01_.size = hf01_.ids.size();
223  df5_.size = hf5_.ids.size();
224  df3_.size = hf3_.ids.size();
225 }
226 
229 
230  ctx.emplace(event, digisF01HEToken_, std::move(df01_));
231  ctx.emplace(event, digisF5HBToken_, std::move(df5_));
232  ctx.emplace(event, digisF3HBToken_, std::move(df3_));
233 }
234 
void addWithDefaultLabel(ParameterSetDescription const &psetDescription)
T getParameter(std::string const &) const
Definition: ParameterSet.h:307
static const int MAXSAMPLES
Definition: HBHEDataFrame.h:95
void acquire(edm::Event const &, edm::EventSetup const &, edm::WaitingTaskWithArenaHolder) override
static constexpr int HEADER_WORDS
edm::EDPutTokenT< ProductTypef01 > digisF01HEToken_
static void fillDescriptions(edm::ConfigurationDescriptions &)
StoragePolicy::template StorageSelector< uint16_t >::type data
static constexpr int HEADER_WORDS
assert(be >=bs)
constexpr uint32_t stride
Definition: HelixFit.h:22
edm::EDPutTokenT< ProductTypef3 > digisF3HBToken_
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
DeviceCollectionf3 df3_
edm::EDPutTokenT< ProductTypef5 > digisF5HBToken_
void produce(edm::Event &, edm::EventSetup const &) override
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
static const size_type MAXSAMPLES
StoragePolicy::template StorageSelector< uint32_t >::type ids
edm::EDGetTokenT< HBHEDigiCollection > hbheDigiToken_
DeviceCollectionf5 df5_
~HcalDigisProducerGPU() override=default
cms::cuda::ContextState cudaState_
static constexpr int HEADER_WORDS
HcalDigisProducerGPU(edm::ParameterSet const &ps)
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
std::enable_if< std::is_same< T, ::calo::common::tags::Vec >::value, void >::type clear()
edm::EDGetTokenT< QIE11DigiCollection > qie11DigiToken_
DeviceCollectionf01 df01_
def move(src, dest)
Definition: eostools.py:511
Definition: event.py:1