51 dstToken_{produces<cms::cuda::Product<cms::cudatest::Thing>>()} {
54 hostData_ = cms::cuda::make_host_noncached_unique<float>();
63 "This EDProducer is part of the TestCUDAProducer* family. It models a GPU algorithm this is not the first "
64 "algorithm in the chain of the GPU EDProducers, and that transfers some data from GPU to CPU multiple times "
65 "alternating the transfers and kernel executions (e.g. to decide which kernel to run next based on a value from "
66 "GPU). A synchronization between GPU and CPU is needed after each transfer. The synchronizations are implemented "
67 "with the ExternalWork extension and explicit TBB tasks within the module. Produces "
68 "cms::cuda::Product<cms::cudatest::Thing>.");
74 edm::LogVerbatim(
"TestCUDAProducerGPUEWTask") <<
label_ <<
" TestCUDAProducerGPUEWTask::acquire begin event "
87 cudaMemcpyAsync(
hostData_.get(),
devicePtr_.get() + 10,
sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
94 edm::LogVerbatim(
"TestCUDAProducerGPUEWTask") <<
label_ <<
" TestCUDAProducerGPUEWTask::acquire end event "
103 <<
label_ <<
" TestCUDAProducerGPUEWTask::addSimpleWork begin event " << eventID <<
" stream " << streamID
104 <<
" 10th element " << *
hostData_ <<
" not satisfied, queueing more work";
106 cudaMemcpyAsync(hostData_.get(),
devicePtr_.get() + 10,
sizeof(float), cudaMemcpyDeviceToHost, ctx.
stream()));
112 <<
label_ <<
" TestCUDAProducerGPUEWTask::addSimpleWork end event " << eventID <<
" stream " << streamID;
115 <<
label_ <<
" TestCUDAProducerGPUEWTask::addSimpleWork event " << eventID <<
" stream " << streamID
116 <<
" 10th element " << *
hostData_ <<
" not queueing more work";
122 <<
label_ <<
" TestCUDAProducerGPUEWTask::produce begin event " << iEvent.
id().
event() <<
" stream "
132 edm::LogVerbatim(
"TestCUDAProducerGPUEWTask") <<
label_ <<
" TestCUDAProducerGPUEWTask::produce end event "
Log< level::Info, true > LogVerbatim
EventNumber_t event() const
void runSimpleAlgo(float *d_data, cudaStream_t stream) const
void addWithDefaultLabel(ParameterSetDescription const &psetDescription)
const float * get() const
#define DEFINE_FWK_MODULE(type)
unique_ptr< ClusterSequence > cs
unsigned long long EventNumber_t
~TestCUDAProducerGPUEWTask() override=default
TestCUDAProducerGPUKernel gpuAlgo_
std::unique_ptr< T, impl::HostDeleter > unique_ptr
std::unique_ptr< T, impl::DeviceDeleter > unique_ptr
static std::string const input
cms::cuda::device::unique_ptr< float[]> runAlgo(const std::string &label, cudaStream_t stream) const
cms::cuda::host::noncached::unique_ptr< float > hostData_
bool get(ProductID const &oid, Handle< PROD > &result) const
cms::cuda::device::unique_ptr< float[]> devicePtr_
TestCUDAProducerGPUEWTask(edm::ParameterSet const &iConfig)
ParameterDescriptionBase * add(U const &iLabel, T const &value)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void acquire(edm::Event const &iEvent, edm::EventSetup const &iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) override
edm::EDPutTokenT< cms::cuda::Product< cms::cudatest::Thing > > const dstToken_
void setComment(std::string const &value)
cms::cuda::ContextState ctxState_
T getParameter(std::string const &) const
HitContainer const *__restrict__ TkSoA const *__restrict__ Quality const *__restrict__ CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ int32_t int32_t int iev
cudaStream_t stream() const
edm::EDGetTokenT< cms::cuda::Product< cms::cudatest::Thing > > const srcToken_
StreamID streamID() const
#define cudaCheck(ARG,...)
void addSimpleWork(edm::EventNumber_t eventID, edm::StreamID streamID, cms::cuda::ScopedContextTask &ctx)
void produce(edm::Event &iEvent, const edm::EventSetup &iSetup) override