CMS 3D CMS Logo

TestCUDAProducerGPUEWTask.cc
Go to the documentation of this file.
11 
19 
21 
22 #include <thread>
23 
24 class TestCUDAProducerGPUEWTask : public edm::stream::EDProducer<edm::ExternalWork> {
25 public:
26  explicit TestCUDAProducerGPUEWTask(edm::ParameterSet const& iConfig);
27  ~TestCUDAProducerGPUEWTask() override = default;
28 
29  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
30 
31  void acquire(edm::Event const& iEvent,
32  edm::EventSetup const& iSetup,
33  edm::WaitingTaskWithArenaHolder waitingTaskHolder) override;
34  void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override;
35 
36 private:
38 
46 };
47 
49  : label_{iConfig.getParameter<std::string>("@module_label")},
50  srcToken_{consumes<cms::cuda::Product<cms::cudatest::Thing>>(iConfig.getParameter<edm::InputTag>("src"))},
51  dstToken_{produces<cms::cuda::Product<cms::cudatest::Thing>>()} {
53  if (cs->enabled()) {
54  hostData_ = cms::cuda::make_host_noncached_unique<float>();
55  }
56 }
57 
60  desc.add<edm::InputTag>("src", edm::InputTag());
61  descriptions.addWithDefaultLabel(desc);
62  descriptions.setComment(
63  "This EDProducer is part of the TestCUDAProducer* family. It models a GPU algorithm this is not the first "
64  "algorithm in the chain of the GPU EDProducers, and that transfers some data from GPU to CPU multiple times "
65  "alternating the transfers and kernel executions (e.g. to decide which kernel to run next based on a value from "
66  "GPU). A synchronization between GPU and CPU is needed after each transfer. The synchronizations are implemented "
67  "with the ExternalWork extension and explicit TBB tasks within the module. Produces "
68  "cms::cuda::Product<cms::cudatest::Thing>.");
69 }
70 
72  edm::EventSetup const& iSetup,
73  edm::WaitingTaskWithArenaHolder waitingTaskHolder) {
74  edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::acquire begin event "
75  << iEvent.id().event() << " stream " << iEvent.streamID();
76 
77  auto const& in = iEvent.get(srcToken_);
78  cms::cuda::ScopedContextAcquire ctx{in, waitingTaskHolder, ctxState_};
79 
80  cms::cudatest::Thing const& input = ctx.get(in);
81 
82  devicePtr_ = gpuAlgo_.runAlgo(label_, input.get(), ctx.stream());
83  // Mimick the need to transfer some of the GPU data back to CPU to
84  // be used for something within this module, or to be put in the
85  // event.
86  cudaCheck(
87  cudaMemcpyAsync(hostData_.get(), devicePtr_.get() + 10, sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
88  // Push a task to run addSimpleWork() after the asynchronous work
89  // (and acquire()) has finished instead of produce()
90  ctx.pushNextTask([iev = iEvent.id().event(), istr = iEvent.streamID(), this](cms::cuda::ScopedContextTask ctx) {
91  addSimpleWork(iev, istr, ctx);
92  });
93 
94  edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::acquire end event "
95  << iEvent.id().event() << " stream " << iEvent.streamID();
96 }
97 
99  edm::StreamID streamID,
101  if (*hostData_ < 13) {
102  edm::LogVerbatim("TestCUDAProducerGPUEWTask")
103  << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork begin event " << eventID << " stream " << streamID
104  << " 10th element " << *hostData_ << " not satisfied, queueing more work";
105  cudaCheck(
106  cudaMemcpyAsync(hostData_.get(), devicePtr_.get() + 10, sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
107 
108  ctx.pushNextTask(
109  [eventID, streamID, this](cms::cuda::ScopedContextTask ctx) { addSimpleWork(eventID, streamID, ctx); });
110  gpuAlgo_.runSimpleAlgo(devicePtr_.get(), ctx.stream());
111  edm::LogVerbatim("TestCUDAProducerGPUEWTask")
112  << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork end event " << eventID << " stream " << streamID;
113  } else {
114  edm::LogVerbatim("TestCUDAProducerGPUEWTask")
115  << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork event " << eventID << " stream " << streamID
116  << " 10th element " << *hostData_ << " not queueing more work";
117  }
118 }
119 
121  edm::LogVerbatim("TestCUDAProducerGPUEWTask")
122  << label_ << " TestCUDAProducerGPUEWTask::produce begin event " << iEvent.id().event() << " stream "
123  << iEvent.streamID() << " 10th element " << *hostData_;
124  if (*hostData_ != 13) {
125  throw cms::Exception("Assert") << "Expecting 10th element to be 13, got " << *hostData_;
126  }
127 
129 
130  ctx.emplace(iEvent, dstToken_, std::move(devicePtr_));
131 
132  edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::produce end event "
133  << iEvent.id().event() << " stream " << iEvent.streamID();
134 }
135 
Log< level::Info, true > LogVerbatim
void addWithDefaultLabel(ParameterSetDescription const &psetDescription)
T getParameter(std::string const &) const
Definition: ParameterSet.h:303
void runSimpleAlgo(float *d_data, cudaStream_t stream) const
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
unsigned long long EventNumber_t
~TestCUDAProducerGPUEWTask() override=default
TestCUDAProducerGPUKernel gpuAlgo_
std::unique_ptr< T, impl::HostDeleter > unique_ptr
std::unique_ptr< T, impl::DeviceDeleter > unique_ptr
static std::string const input
Definition: EdmProvDump.cc:47
int iEvent
Definition: GenABIO.cc:224
cms::cuda::host::noncached::unique_ptr< float > hostData_
cms::cuda::device::unique_ptr< float[]> devicePtr_
TestCUDAProducerGPUEWTask(edm::ParameterSet const &iConfig)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void acquire(edm::Event const &iEvent, edm::EventSetup const &iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) override
edm::EDPutTokenT< cms::cuda::Product< cms::cudatest::Thing > > const dstToken_
void setComment(std::string const &value)
cms::cuda::ContextState ctxState_
HitContainer const *__restrict__ TkSoA const *__restrict__ Quality const *__restrict__ CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ int32_t int32_t int iev
edm::EDGetTokenT< cms::cuda::Product< cms::cudatest::Thing > > const srcToken_
cms::cuda::device::unique_ptr< float[]> runAlgo(const std::string &label, cudaStream_t stream) const
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
def move(src, dest)
Definition: eostools.py:511
void addSimpleWork(edm::EventNumber_t eventID, edm::StreamID streamID, cms::cuda::ScopedContextTask &ctx)
void produce(edm::Event &iEvent, const edm::EventSetup &iSetup) override