CMS 3D CMS Logo

List of all members | Public Member Functions | Static Public Member Functions | Private Member Functions | Private Attributes
TestCUDAProducerGPUEWTask Class Reference
Inheritance diagram for TestCUDAProducerGPUEWTask:
edm::stream::EDProducer< edm::ExternalWork >

Public Member Functions

void acquire (edm::Event const &iEvent, edm::EventSetup const &iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) override
 
void produce (edm::Event &iEvent, const edm::EventSetup &iSetup) override
 
 TestCUDAProducerGPUEWTask (edm::ParameterSet const &iConfig)
 
 ~TestCUDAProducerGPUEWTask () override=default
 
- Public Member Functions inherited from edm::stream::EDProducer< edm::ExternalWork >
 EDProducer ()=default
 
 EDProducer (const EDProducer &)=delete
 
bool hasAbilityToProduceInBeginLumis () const final
 
bool hasAbilityToProduceInBeginProcessBlocks () const final
 
bool hasAbilityToProduceInBeginRuns () const final
 
bool hasAbilityToProduceInEndLumis () const final
 
bool hasAbilityToProduceInEndProcessBlocks () const final
 
bool hasAbilityToProduceInEndRuns () const final
 
const EDProduceroperator= (const EDProducer &)=delete
 

Static Public Member Functions

static void fillDescriptions (edm::ConfigurationDescriptions &descriptions)
 

Private Member Functions

void addSimpleWork (edm::EventNumber_t eventID, edm::StreamID streamID, cms::cuda::ScopedContextTask &ctx)
 

Private Attributes

cms::cuda::ContextState ctxState_
 
cms::cuda::device::unique_ptr< float[]> devicePtr_
 
edm::EDPutTokenT< cms::cuda::Product< cms::cudatest::Thing > > const dstToken_
 
TestCUDAProducerGPUKernel gpuAlgo_
 
cms::cuda::host::noncached::unique_ptr< float > hostData_
 
std::string const label_
 
edm::EDGetTokenT< cms::cuda::Product< cms::cudatest::Thing > > const srcToken_
 

Additional Inherited Members

- Public Types inherited from edm::stream::EDProducer< edm::ExternalWork >
using CacheTypes = CacheContexts< T... >
 
using GlobalCache = typename CacheTypes::GlobalCache
 
using HasAbility = AbilityChecker< T... >
 
using InputProcessBlockCache = typename CacheTypes::InputProcessBlockCache
 
using LuminosityBlockCache = typename CacheTypes::LuminosityBlockCache
 
using LuminosityBlockContext = LuminosityBlockContextT< LuminosityBlockCache, RunCache, GlobalCache >
 
using LuminosityBlockSummaryCache = typename CacheTypes::LuminosityBlockSummaryCache
 
using RunCache = typename CacheTypes::RunCache
 
using RunContext = RunContextT< RunCache, GlobalCache >
 
using RunSummaryCache = typename CacheTypes::RunSummaryCache
 

Detailed Description

Definition at line 24 of file TestCUDAProducerGPUEWTask.cc.

Constructor & Destructor Documentation

◆ TestCUDAProducerGPUEWTask()

TestCUDAProducerGPUEWTask::TestCUDAProducerGPUEWTask ( edm::ParameterSet const &  iConfig)
explicit

Definition at line 48 of file TestCUDAProducerGPUEWTask.cc.

References edm::ParameterSet::getParameter(), and AlCaHLTBitMon_QueryRunRegistry::string.

49  : label_{iConfig.getParameter<std::string>("@module_label")},
50  srcToken_{consumes<cms::cuda::Product<cms::cudatest::Thing>>(iConfig.getParameter<edm::InputTag>("src"))},
51  dstToken_{produces<cms::cuda::Product<cms::cudatest::Thing>>()} {
53  if (cuda and cuda->enabled()) {
54  hostData_ = cms::cuda::make_host_noncached_unique<float>();
55  }
56 }
cms::cuda::host::noncached::unique_ptr< float > hostData_
edm::EDPutTokenT< cms::cuda::Product< cms::cudatest::Thing > > const dstToken_
edm::EDGetTokenT< cms::cuda::Product< cms::cudatest::Thing > > const srcToken_

◆ ~TestCUDAProducerGPUEWTask()

TestCUDAProducerGPUEWTask::~TestCUDAProducerGPUEWTask ( )
overridedefault

Member Function Documentation

◆ acquire()

void TestCUDAProducerGPUEWTask::acquire ( edm::Event const &  iEvent,
edm::EventSetup const &  iSetup,
edm::WaitingTaskWithArenaHolder  waitingTaskHolder 
)
override

Definition at line 71 of file TestCUDAProducerGPUEWTask.cc.

References addSimpleWork(), ctxState_, cudaCheck, devicePtr_, dqmMemoryStats::float, gpuAlgo_, hostData_, caHitNtupletGeneratorKernels::iev, iEvent, recoMuon::in, input, label_, TestCUDAProducerGPUKernel::runAlgo(), and srcToken_.

73  {
74  edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::acquire begin event "
75  << iEvent.id().event() << " stream " << iEvent.streamID();
76 
77  auto const& in = iEvent.get(srcToken_);
78  cms::cuda::ScopedContextAcquire ctx{in, waitingTaskHolder, ctxState_};
79 
80  cms::cudatest::Thing const& input = ctx.get(in);
81 
82  devicePtr_ = gpuAlgo_.runAlgo(label_, input.get(), ctx.stream());
83  // Mimick the need to transfer some of the GPU data back to CPU to
84  // be used for something within this module, or to be put in the
85  // event.
86  cudaCheck(
87  cudaMemcpyAsync(hostData_.get(), devicePtr_.get() + 10, sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
88  // Push a task to run addSimpleWork() after the asynchronous work
89  // (and acquire()) has finished instead of produce()
90  ctx.pushNextTask([iev = iEvent.id().event(), istr = iEvent.streamID(), this](cms::cuda::ScopedContextTask ctx) {
91  addSimpleWork(iev, istr, ctx);
92  });
93 
94  edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::acquire end event "
95  << iEvent.id().event() << " stream " << iEvent.streamID();
96 }
Log< level::Info, true > LogVerbatim
TkSoAView< TrackerTraits > HitToTuple< TrackerTraits > const *__restrict__ int32_t int32_t int iev
TestCUDAProducerGPUKernel gpuAlgo_
static std::string const input
Definition: EdmProvDump.cc:50
int iEvent
Definition: GenABIO.cc:224
cms::cuda::host::noncached::unique_ptr< float > hostData_
cms::cuda::device::unique_ptr< float[]> devicePtr_
cms::cuda::ContextState ctxState_
edm::EDGetTokenT< cms::cuda::Product< cms::cudatest::Thing > > const srcToken_
cms::cuda::device::unique_ptr< float[]> runAlgo(const std::string &label, cudaStream_t stream) const
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
void addSimpleWork(edm::EventNumber_t eventID, edm::StreamID streamID, cms::cuda::ScopedContextTask &ctx)

◆ addSimpleWork()

void TestCUDAProducerGPUEWTask::addSimpleWork ( edm::EventNumber_t  eventID,
edm::StreamID  streamID,
cms::cuda::ScopedContextTask ctx 
)
private

Definition at line 98 of file TestCUDAProducerGPUEWTask.cc.

References cudaCheck, devicePtr_, dqmMemoryStats::float, gpuAlgo_, hostData_, label_, cms::cuda::ScopedContextTask::pushNextTask(), TestCUDAProducerGPUKernel::runSimpleAlgo(), and cms::cuda::impl::ScopedContextBase::stream().

Referenced by acquire().

100  {
101  if (*hostData_ < 13) {
102  edm::LogVerbatim("TestCUDAProducerGPUEWTask")
103  << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork begin event " << eventID << " stream " << streamID
104  << " 10th element " << *hostData_ << " not satisfied, queueing more work";
105  cudaCheck(
106  cudaMemcpyAsync(hostData_.get(), devicePtr_.get() + 10, sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
107 
108  ctx.pushNextTask(
109  [eventID, streamID, this](cms::cuda::ScopedContextTask ctx) { addSimpleWork(eventID, streamID, ctx); });
110  gpuAlgo_.runSimpleAlgo(devicePtr_.get(), ctx.stream());
111  edm::LogVerbatim("TestCUDAProducerGPUEWTask")
112  << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork end event " << eventID << " stream " << streamID;
113  } else {
114  edm::LogVerbatim("TestCUDAProducerGPUEWTask")
115  << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork event " << eventID << " stream " << streamID
116  << " 10th element " << *hostData_ << " not queueing more work";
117  }
118 }
Log< level::Info, true > LogVerbatim
void runSimpleAlgo(float *d_data, cudaStream_t stream) const
TestCUDAProducerGPUKernel gpuAlgo_
cms::cuda::host::noncached::unique_ptr< float > hostData_
cms::cuda::device::unique_ptr< float[]> devicePtr_
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
void addSimpleWork(edm::EventNumber_t eventID, edm::StreamID streamID, cms::cuda::ScopedContextTask &ctx)

◆ fillDescriptions()

void TestCUDAProducerGPUEWTask::fillDescriptions ( edm::ConfigurationDescriptions descriptions)
static

Definition at line 58 of file TestCUDAProducerGPUEWTask.cc.

References edm::ConfigurationDescriptions::addWithDefaultLabel(), submitPVResolutionJobs::desc, HLT_2022v15_cff::InputTag, and edm::ConfigurationDescriptions::setComment().

58  {
60  desc.add<edm::InputTag>("src", edm::InputTag());
61  descriptions.addWithDefaultLabel(desc);
62  descriptions.setComment(
63  "This EDProducer is part of the TestCUDAProducer* family. It models a GPU algorithm this is not the first "
64  "algorithm in the chain of the GPU EDProducers, and that transfers some data from GPU to CPU multiple times "
65  "alternating the transfers and kernel executions (e.g. to decide which kernel to run next based on a value from "
66  "GPU). A synchronization between GPU and CPU is needed after each transfer. The synchronizations are implemented "
67  "with the ExternalWork extension and explicit TBB tasks within the module. Produces "
68  "cms::cuda::Product<cms::cudatest::Thing>.");
69 }
void addWithDefaultLabel(ParameterSetDescription const &psetDescription)
void setComment(std::string const &value)

◆ produce()

void TestCUDAProducerGPUEWTask::produce ( edm::Event iEvent,
const edm::EventSetup iSetup 
)
override

Definition at line 120 of file TestCUDAProducerGPUEWTask.cc.

References ctxState_, devicePtr_, dstToken_, Exception, hostData_, iEvent, label_, and eostools::move().

120  {
121  edm::LogVerbatim("TestCUDAProducerGPUEWTask")
122  << label_ << " TestCUDAProducerGPUEWTask::produce begin event " << iEvent.id().event() << " stream "
123  << iEvent.streamID() << " 10th element " << *hostData_;
124  if (*hostData_ != 13) {
125  throw cms::Exception("Assert") << "Expecting 10th element to be 13, got " << *hostData_;
126  }
127 
129 
130  ctx.emplace(iEvent, dstToken_, std::move(devicePtr_));
131 
132  edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::produce end event "
133  << iEvent.id().event() << " stream " << iEvent.streamID();
134 }
Log< level::Info, true > LogVerbatim
int iEvent
Definition: GenABIO.cc:224
cms::cuda::host::noncached::unique_ptr< float > hostData_
cms::cuda::device::unique_ptr< float[]> devicePtr_
edm::EDPutTokenT< cms::cuda::Product< cms::cudatest::Thing > > const dstToken_
cms::cuda::ContextState ctxState_
def move(src, dest)
Definition: eostools.py:511

Member Data Documentation

◆ ctxState_

cms::cuda::ContextState TestCUDAProducerGPUEWTask::ctxState_
private

Definition at line 43 of file TestCUDAProducerGPUEWTask.cc.

Referenced by acquire(), and produce().

◆ devicePtr_

cms::cuda::device::unique_ptr<float[]> TestCUDAProducerGPUEWTask::devicePtr_
private

Definition at line 44 of file TestCUDAProducerGPUEWTask.cc.

Referenced by acquire(), addSimpleWork(), and produce().

◆ dstToken_

edm::EDPutTokenT<cms::cuda::Product<cms::cudatest::Thing> > const TestCUDAProducerGPUEWTask::dstToken_
private

Definition at line 41 of file TestCUDAProducerGPUEWTask.cc.

Referenced by produce().

◆ gpuAlgo_

TestCUDAProducerGPUKernel TestCUDAProducerGPUEWTask::gpuAlgo_
private

Definition at line 42 of file TestCUDAProducerGPUEWTask.cc.

Referenced by acquire(), and addSimpleWork().

◆ hostData_

cms::cuda::host::noncached::unique_ptr<float> TestCUDAProducerGPUEWTask::hostData_
private

Definition at line 45 of file TestCUDAProducerGPUEWTask.cc.

Referenced by acquire(), addSimpleWork(), and produce().

◆ label_

std::string const TestCUDAProducerGPUEWTask::label_
private

◆ srcToken_

edm::EDGetTokenT<cms::cuda::Product<cms::cudatest::Thing> > const TestCUDAProducerGPUEWTask::srcToken_
private

Definition at line 40 of file TestCUDAProducerGPUEWTask.cc.

Referenced by acquire().