d0/dd7/ScopedContext_8cc_source.html

 #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"

 #include "FWCore/Concurrency/interface/Async.h"
 #include "FWCore/MessageLogger/interface/MessageLogger.h"
 #include "FWCore/ServiceRegistry/interface/Service.h"
 #include "FWCore/Utilities/interface/Exception.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/StreamCache.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

 #include "chooseDevice.h"

 namespace cms::cuda {
   namespace impl {
     ScopedContextBase::ScopedContextBase(edm::StreamID streamID) : currentDevice_(chooseDevice(streamID)) {
       cudaCheck(cudaSetDevice(currentDevice_));
       stream_ = getStreamCache().get();
     }

     ScopedContextBase::ScopedContextBase(const ProductBase& data) : currentDevice_(data.device()) {
       cudaCheck(cudaSetDevice(currentDevice_));
       if (data.mayReuseStream()) {
         stream_ = data.streamPtr();
       } else {
         stream_ = getStreamCache().get();
       }
     }

     ScopedContextBase::ScopedContextBase(int device, SharedStreamPtr stream)
         : currentDevice_(device), stream_(std::move(stream)) {
       cudaCheck(cudaSetDevice(currentDevice_));
     }


     void ScopedContextGetterBase::synchronizeStreams(int dataDevice,
                                                      cudaStream_t dataStream,
                                                      bool available,
                                                      cudaEvent_t dataEvent) {
       if (dataDevice != device()) {
         // Eventually replace with prefetch to current device (assuming unified memory works)
         // If we won't go to unified memory, need to figure out something else...
         throw cms::Exception("LogicError") << "Handling data from multiple devices is not yet supported";
       }

       if (dataStream != stream()) {
         // Different streams, need to synchronize
         if (not available) {
           // Event not yet occurred, so need to add synchronization
           // here. Sychronization is done by making the CUDA stream to
           // wait for an event, so all subsequent work in the stream
           // will run only after the event has "occurred" (i.e. data
           // product became available).
           cudaCheck(cudaStreamWaitEvent(stream(), dataEvent, 0), "Failed to make a stream to wait for an event");
         }
       }
     }

     void ScopedContextHolderHelper::enqueueCallback(int device, cudaStream_t stream) {
       edm::Service<edm::Async> async;
       SharedEventPtr event = getEventCache().get();
       cudaCheck(cudaEventRecord(event.get(), stream));
       async->runAsync(
           std::move(waitingTaskHolder_),
           [event = std::move(event)]() mutable { cudaCheck(cudaEventSynchronize(event.get())); },
           []() { return "Enqueued by cms::cuda::ScopedContextHolderHelper::enqueueCallback()"; });
     }
   }  // namespace impl


   ScopedContextAcquire::~ScopedContextAcquire() noexcept(false) {
     holderHelper_.enqueueCallback(device(), stream());
     if (contextState_) {
       contextState_->set(device(), streamPtr());
     }
   }

   void ScopedContextAcquire::throwNoState() {
     throw cms::Exception("LogicError")
         << "Calling ScopedContextAcquire::insertNextTask() requires ScopedContextAcquire to be constructed with "
            "ContextState, but that was not the case";
   }


   ScopedContextProduce::~ScopedContextProduce() {
     // Intentionally not checking the return value to avoid throwing
     // exceptions. If this call would fail, we should get failures
     // elsewhere as well.
     cudaEventRecord(event_.get(), stream());
   }


   ScopedContextTask::~ScopedContextTask() { holderHelper_.enqueueCallback(device(), stream()); }
 }  // namespace cms::cuda
edm::Service
Definition: Service.h:30

Exception
Definition: hltDiff.cc:245

MessageLogger.h

cms::cuda::SharedEventPtr
std::shared_ptr< std::remove_pointer_t< cudaEvent_t > > SharedEventPtr
Definition: SharedEventPtr.h:14

Async.h

cms::cuda
Definition: PortableDeviceCollection.h:9

funct::false
false
Definition: Factorize.h:29

cms::cuda::ScopedContextProduce::event_
SharedEventPtr event_
Definition: ScopedContext.h:183

cms::cuda::impl::ScopedContextGetterBase::synchronizeStreams
void synchronizeStreams(int dataDevice, cudaStream_t dataStream, bool available, cudaEvent_t dataEvent)
Definition: ScopedContext.cc:35

edm::StreamID
Definition: StreamID.h:30

impl
Definition: trackAlgoPriorityOrder.h:18

std
Definition: JetResolutionObject.h:76

StreamCache.h

cms::cuda::SharedStreamPtr
std::shared_ptr< std::remove_pointer_t< cudaStream_t > > SharedStreamPtr
Definition: SharedStreamPtr.h:14

cms::cuda::stream
uint32_t T const  *__restrict__ uint32_t const  *__restrict__ int32_t int Histo::index_type cudaStream_t stream
Definition: HistoContainer.h:51

ScopedContext.h

cms::cuda::ScopedContextTask::~ScopedContextTask
~ScopedContextTask()
Definition: ScopedContext.cc:95

cms::cuda::impl::ScopedContextBase::stream_
SharedStreamPtr stream_
Definition: ScopedContext.h:52

chooseDevice.h

cms::cuda::ScopedContextAcquire::contextState_
ContextState * contextState_
Definition: ScopedContext.h:140

cms::cuda::EventCache::get
SharedEventPtr get()
Definition: EventCache.cc:21

cms::cuda::ProductBase
Definition: ProductBase.h:20

edm::Async::runAsync
void runAsync(WaitingTaskWithArenaHolder holder, F &&func, G &&errorContextFunc)
Definition: Async.h:21

cms::cuda::impl::ScopedContextBase::streamPtr
const SharedStreamPtr & streamPtr() const
Definition: ScopedContext.h:35

cms::cuda::chooseDevice
int chooseDevice(edm::StreamID id)
Definition: chooseDevice.cc:8

cms::cuda::ScopedContextTask::holderHelper_
impl::ScopedContextHolderHelper holderHelper_
Definition: ScopedContext.h:212

cms::cuda::StreamCache::get
SharedStreamPtr get()
Definition: StreamCache.cc:20

Service.h

cms::cuda::ScopedContextProduce::~ScopedContextProduce
~ScopedContextProduce()
Record the CUDA event, all asynchronous work must have been queued before the destructor.
Definition: ScopedContext.cc:86

cms::cuda::impl::ScopedContextHolderHelper::waitingTaskHolder_
edm::WaitingTaskWithArenaHolder waitingTaskHolder_
Definition: ScopedContext.h:90

cms::cuda::impl::ScopedContextBase::currentDevice_
int currentDevice_
Definition: ScopedContext.h:51

cms::cuda::ContextState::set
void set(int device, SharedStreamPtr stream)
Definition: ContextState.h:30

Exception.h

cms::cuda::impl::ScopedContextBase::device
int device() const
Definition: ScopedContext.h:28

cudaCheck.h

cms::cuda::ScopedContextAcquire::~ScopedContextAcquire
~ScopedContextAcquire() noexcept(false)
Definition: ScopedContext.cc:71

data
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80

cms::cuda::getStreamCache
StreamCache & getStreamCache()
Definition: StreamCache.cc:39

cms::cuda::getEventCache
EventCache & getEventCache()
Definition: EventCache.cc:66

cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69

cms::cuda::impl::ScopedContextBase::stream
cudaStream_t stream() const
Definition: ScopedContext.h:34

cms::cuda::ScopedContextAcquire::holderHelper_
impl::ScopedContextHolderHelper holderHelper_
Definition: ScopedContext.h:139

cms::cuda::impl::ScopedContextHolderHelper::enqueueCallback
void enqueueCallback(int device, cudaStream_t stream)
Definition: ScopedContext.cc:58

eostools.move
def move(src, dest)
Definition: eostools.py:511

event
Definition: event.py:1

cms::cuda::impl::ScopedContextBase::ScopedContextBase
ScopedContextBase(edm::StreamID streamID)
Definition: ScopedContext.cc:14

cms::cuda::ScopedContextAcquire::throwNoState
void throwNoState()
Definition: ScopedContext.cc:78