CMS 3D CMS Logo

TestCUDAProducerGPUEWTask.cc
Go to the documentation of this file.
11 
19 
21 
22 #include <thread>
23 
24 class TestCUDAProducerGPUEWTask : public edm::stream::EDProducer<edm::ExternalWork> {
25 public:
26  explicit TestCUDAProducerGPUEWTask(edm::ParameterSet const& iConfig);
27  ~TestCUDAProducerGPUEWTask() override = default;
28 
29  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
30 
31  void acquire(edm::Event const& iEvent,
32  edm::EventSetup const& iSetup,
33  edm::WaitingTaskWithArenaHolder waitingTaskHolder) override;
34  void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override;
35 
36 private:
38 
46 };
47 
49  : label_{iConfig.getParameter<std::string>("@module_label")},
50  srcToken_{consumes<cms::cuda::Product<cms::cudatest::Thing>>(iConfig.getParameter<edm::InputTag>("src"))},
51  dstToken_{produces<cms::cuda::Product<cms::cudatest::Thing>>()} {
53  if (cs->enabled()) {
54  hostData_ = cms::cuda::make_host_noncached_unique<float>();
55  }
56 }
57 
60  desc.add<edm::InputTag>("src", edm::InputTag());
61  descriptions.addWithDefaultLabel(desc);
62  descriptions.setComment(
63  "This EDProducer is part of the TestCUDAProducer* family. It models a GPU algorithm this is not the first "
64  "algorithm in the chain of the GPU EDProducers, and that transfers some data from GPU to CPU multiple times "
65  "alternating the transfers and kernel executions (e.g. to decide which kernel to run next based on a value from "
66  "GPU). A synchronization between GPU and CPU is needed after each transfer. The synchronizations are implemented "
67  "with the ExternalWork extension and explicit TBB tasks within the module. Produces "
68  "cms::cuda::Product<cms::cudatest::Thing>.");
69 }
70 
72  edm::EventSetup const& iSetup,
73  edm::WaitingTaskWithArenaHolder waitingTaskHolder) {
74  edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::acquire begin event "
75  << iEvent.id().event() << " stream " << iEvent.streamID();
76 
77  auto const& in = iEvent.get(srcToken_);
78  cms::cuda::ScopedContextAcquire ctx{in, waitingTaskHolder, ctxState_};
79 
80  cms::cudatest::Thing const& input = ctx.get(in);
81 
82  devicePtr_ = gpuAlgo_.runAlgo(label_, input.get(), ctx.stream());
83  // Mimick the need to transfer some of the GPU data back to CPU to
84  // be used for something within this module, or to be put in the
85  // event.
86  cudaCheck(
87  cudaMemcpyAsync(hostData_.get(), devicePtr_.get() + 10, sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
88  // Push a task to run addSimpleWork() after the asynchronous work
89  // (and acquire()) has finished instead of produce()
90  ctx.pushNextTask([iev = iEvent.id().event(), istr = iEvent.streamID(), this](cms::cuda::ScopedContextTask ctx) {
91  addSimpleWork(iev, istr, ctx);
92  });
93 
94  edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::acquire end event "
95  << iEvent.id().event() << " stream " << iEvent.streamID();
96 }
97 
99  edm::StreamID streamID,
101  if (*hostData_ < 13) {
102  edm::LogVerbatim("TestCUDAProducerGPUEWTask")
103  << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork begin event " << eventID << " stream " << streamID
104  << " 10th element " << *hostData_ << " not satisfied, queueing more work";
105  cudaCheck(
106  cudaMemcpyAsync(hostData_.get(), devicePtr_.get() + 10, sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
107 
108  ctx.pushNextTask(
109  [eventID, streamID, this](cms::cuda::ScopedContextTask ctx) { addSimpleWork(eventID, streamID, ctx); });
110  gpuAlgo_.runSimpleAlgo(devicePtr_.get(), ctx.stream());
111  edm::LogVerbatim("TestCUDAProducerGPUEWTask")
112  << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork end event " << eventID << " stream " << streamID;
113  } else {
114  edm::LogVerbatim("TestCUDAProducerGPUEWTask")
115  << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork event " << eventID << " stream " << streamID
116  << " 10th element " << *hostData_ << " not queueing more work";
117  }
118 }
119 
121  edm::LogVerbatim("TestCUDAProducerGPUEWTask")
122  << label_ << " TestCUDAProducerGPUEWTask::produce begin event " << iEvent.id().event() << " stream "
123  << iEvent.streamID() << " 10th element " << *hostData_;
124  if (*hostData_ != 13) {
125  throw cms::Exception("Assert") << "Expecting 10th element to be 13, got " << *hostData_;
126  }
127 
129 
130  ctx.emplace(iEvent, dstToken_, std::move(devicePtr_));
131 
132  edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::produce end event "
133  << iEvent.id().event() << " stream " << iEvent.streamID();
134 }
135 
edm::StreamID
Definition: StreamID.h:30
edm::ParameterSetDescription::add
ParameterDescriptionBase * add(U const &iLabel, T const &value)
Definition: ParameterSetDescription.h:95
input
static const std::string input
Definition: EdmProvDump.cc:48
dqmMemoryStats.float
float
Definition: dqmMemoryStats.py:127
TestCUDAProducerGPUKernel::runAlgo
cms::cuda::device::unique_ptr< float[]> runAlgo(const std::string &label, cudaStream_t stream) const
Definition: TestCUDAProducerGPUKernel.h:27
TestCUDAProducerGPUEWTask::devicePtr_
cms::cuda::device::unique_ptr< float[]> devicePtr_
Definition: TestCUDAProducerGPUEWTask.cc:44
cms::cuda::ScopedContextProduce
Definition: ScopedContext.h:149
fwrapper::cs
unique_ptr< ClusterSequence > cs
Definition: fastjetfortran_madfks.cc:45
WaitingTaskHolder.h
TestCUDAProducerGPUEWTask::label_
const std::string label_
Definition: TestCUDAProducerGPUEWTask.cc:39
edm::EDGetTokenT
Definition: EDGetToken.h:33
cms::cuda::ScopedContextTask::pushNextTask
void pushNextTask(F &&f)
Definition: ScopedContext.h:203
edm::EDPutTokenT
Definition: EDPutToken.h:33
cms::cuda::impl::ScopedContextBase::stream
cudaStream_t stream() const
Definition: ScopedContext.h:34
cms::cuda::host::noncached::unique_ptr
std::unique_ptr< T, impl::HostDeleter > unique_ptr
Definition: host_noncached_unique_ptr.h:23
edm::ParameterSetDescription
Definition: ParameterSetDescription.h:52
FunctorTask.h
EDProducer.h
TestCUDAProducerGPUEWTask::ctxState_
cms::cuda::ContextState ctxState_
Definition: TestCUDAProducerGPUEWTask.cc:43
TestCUDAProducerGPUKernel.h
host_noncached_unique_ptr.h
TestCUDAProducerGPUEWTask::TestCUDAProducerGPUEWTask
TestCUDAProducerGPUEWTask(edm::ParameterSet const &iConfig)
Definition: TestCUDAProducerGPUEWTask.cc:48
edm::WaitingTaskWithArenaHolder
Definition: WaitingTaskWithArenaHolder.h:31
MakerMacros.h
TestCUDAProducerGPUEWTask::acquire
void acquire(edm::Event const &iEvent, edm::EventSetup const &iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) override
Definition: TestCUDAProducerGPUEWTask.cc:71
DEFINE_FWK_MODULE
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
TestCUDAProducerGPUEWTask::addSimpleWork
void addSimpleWork(edm::EventNumber_t eventID, edm::StreamID streamID, cms::cuda::ScopedContextTask &ctx)
Definition: TestCUDAProducerGPUEWTask.cc:98
Service.h
TestCUDAProducerGPUEWTask::hostData_
cms::cuda::host::noncached::unique_ptr< float > hostData_
Definition: TestCUDAProducerGPUEWTask.cc:45
edm::EventNumber_t
unsigned long long EventNumber_t
Definition: RunLumiEventNumber.h:12
ParameterSetDescription.h
TestCUDAProducerGPUEWTask::dstToken_
const edm::EDPutTokenT< cms::cuda::Product< cms::cudatest::Thing > > dstToken_
Definition: TestCUDAProducerGPUEWTask.cc:41
TestCUDAProducerGPUEWTask::srcToken_
const edm::EDGetTokenT< cms::cuda::Product< cms::cudatest::Thing > > srcToken_
Definition: TestCUDAProducerGPUEWTask.cc:40
edm::ConfigurationDescriptions
Definition: ConfigurationDescriptions.h:28
AlCaHLTBitMon_QueryRunRegistry.string
string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256
TestCUDAProducerGPUKernel::runSimpleAlgo
void runSimpleAlgo(float *d_data, cudaStream_t stream) const
cms::cuda::ContextState
Definition: ContextState.h:15
TestCUDAProducerGPUEWTask
Definition: TestCUDAProducerGPUEWTask.cc:24
HLT_2018_cff.InputTag
InputTag
Definition: HLT_2018_cff.py:79016
TestCUDAProducerGPUEWTask::fillDescriptions
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Definition: TestCUDAProducerGPUEWTask.cc:58
edm::ParameterSet
Definition: ParameterSet.h:36
Event.h
edm::ConfigurationDescriptions::setComment
void setComment(std::string const &value)
Definition: ConfigurationDescriptions.cc:48
recoMuon::in
Definition: RecoMuonEnumerators.h:6
cms::cudatest::Thing
Definition: Thing.h:8
edm::Service
Definition: Service.h:30
iEvent
int iEvent
Definition: GenABIO.cc:224
edm::LogVerbatim
Definition: MessageLogger.h:297
cudaCheck.h
TestCUDAProducerGPUEWTask::~TestCUDAProducerGPUEWTask
~TestCUDAProducerGPUEWTask() override=default
edm::stream::EDProducer
Definition: EDProducer.h:38
edm::EventSetup
Definition: EventSetup.h:57
TestCUDAProducerGPUEWTask::gpuAlgo_
TestCUDAProducerGPUKernel gpuAlgo_
Definition: TestCUDAProducerGPUEWTask.cc:42
WaitingTask.h
cms::cuda::device::unique_ptr
std::unique_ptr< T, impl::DeviceDeleter > unique_ptr
Definition: device_unique_ptr.h:33
TestCUDAProducerGPUKernel
Definition: TestCUDAProducerGPUKernel.h:19
edm::ParameterSet::getParameter
T getParameter(std::string const &) const
CUDAService.h
cms::cuda::ScopedContextAcquire
Definition: ScopedContext.h:101
Product.h
TestCUDAProducerGPUEWTask::produce
void produce(edm::Event &iEvent, const edm::EventSetup &iSetup) override
Definition: TestCUDAProducerGPUEWTask.cc:120
ContextState.h
eostools.move
def move(src, dest)
Definition: eostools.py:511
cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:62
Frameworkfwd.h
cms::cuda::ScopedContextTask
Definition: ScopedContext.h:192
Exception
Definition: hltDiff.cc:246
ScopedContext.h
Thing.h
ParameterSet.h
edm::Event
Definition: Event.h:73
edm::InputTag
Definition: InputTag.h:15
edm::ConfigurationDescriptions::addWithDefaultLabel
void addWithDefaultLabel(ParameterSetDescription const &psetDescription)
Definition: ConfigurationDescriptions.cc:87