d0/dd7/ScopedContext_8cc_source.html

 #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"

 #include "FWCore/MessageLogger/interface/MessageLogger.h"
 #include "FWCore/ServiceRegistry/interface/Service.h"
 #include "FWCore/Utilities/interface/Exception.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/StreamCache.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

 #include "chooseDevice.h"

 namespace {
   struct CallbackData {
     edm::WaitingTaskWithArenaHolder holder;
     int device;
   };

   void CUDART_CB cudaScopedContextCallback(cudaStream_t streamId, cudaError_t status, void* data) {
     std::unique_ptr<CallbackData> guard{reinterpret_cast<CallbackData*>(data)};
     edm::WaitingTaskWithArenaHolder& waitingTaskHolder = guard->holder;
     int device = guard->device;
     if (status == cudaSuccess) {
       LogTrace("ScopedContext") << " GPU kernel finished (in callback) device " << device << " CUDA stream "
                                 << streamId;
       waitingTaskHolder.doneWaiting(nullptr);
     } else {
       // wrap the exception in a try-catch block to let GDB "catch throw" break on it
       try {
         auto error = cudaGetErrorName(status);
         auto message = cudaGetErrorString(status);
         throw cms::Exception("CUDAError") << "Callback of CUDA stream " << streamId << " in device " << device
                                           << " error " << error << ": " << message;
       } catch (cms::Exception&) {
         waitingTaskHolder.doneWaiting(std::current_exception());
       }
     }
   }
 }  // namespace

 namespace cms::cuda {
   namespace impl {
     ScopedContextBase::ScopedContextBase(edm::StreamID streamID) : currentDevice_(chooseDevice(streamID)) {
       cudaCheck(cudaSetDevice(currentDevice_));
       stream_ = getStreamCache().get();
     }

     ScopedContextBase::ScopedContextBase(const ProductBase& data) : currentDevice_(data.device()) {
       cudaCheck(cudaSetDevice(currentDevice_));
       if (data.mayReuseStream()) {
         stream_ = data.streamPtr();
       } else {
         stream_ = getStreamCache().get();
       }
     }

     ScopedContextBase::ScopedContextBase(int device, SharedStreamPtr stream)
         : currentDevice_(device), stream_(std::move(stream)) {
       cudaCheck(cudaSetDevice(currentDevice_));
     }


     void ScopedContextGetterBase::synchronizeStreams(int dataDevice,
                                                      cudaStream_t dataStream,
                                                      bool available,
                                                      cudaEvent_t dataEvent) {
       if (dataDevice != device()) {
         // Eventually replace with prefetch to current device (assuming unified memory works)
         // If we won't go to unified memory, need to figure out something else...
         throw cms::Exception("LogicError") << "Handling data from multiple devices is not yet supported";
       }

       if (dataStream != stream()) {
         // Different streams, need to synchronize
         if (not available) {
           // Event not yet occurred, so need to add synchronization
           // here. Sychronization is done by making the CUDA stream to
           // wait for an event, so all subsequent work in the stream
           // will run only after the event has "occurred" (i.e. data
           // product became available).
           cudaCheck(cudaStreamWaitEvent(stream(), dataEvent, 0), "Failed to make a stream to wait for an event");
         }
       }
     }

     void ScopedContextHolderHelper::enqueueCallback(int device, cudaStream_t stream) {
       cudaCheck(
           cudaStreamAddCallback(stream, cudaScopedContextCallback, new CallbackData{waitingTaskHolder_, device}, 0));
     }
   }  // namespace impl


   ScopedContextAcquire::~ScopedContextAcquire() {
     holderHelper_.enqueueCallback(device(), stream());
     if (contextState_) {
       contextState_->set(device(), streamPtr());
     }
   }

   void ScopedContextAcquire::throwNoState() {
     throw cms::Exception("LogicError")
         << "Calling ScopedContextAcquire::insertNextTask() requires ScopedContextAcquire to be constructed with "
            "ContextState, but that was not the case";
   }


   ScopedContextProduce::~ScopedContextProduce() {
     // Intentionally not checking the return value to avoid throwing
     // exceptions. If this call would fail, we should get failures
     // elsewhere as well.
     cudaEventRecord(event_.get(), stream());
   }


   ScopedContextTask::~ScopedContextTask() { holderHelper_.enqueueCallback(device(), stream()); }
 }  // namespace cms::cuda
relativeConstraints.error
error
Definition: relativeConstraints.py:53

Exception
Definition: hltDiff.cc:245

MessageLogger.h

cms::cuda
Definition: Product.h:14

cms::cuda::ScopedContextProduce::event_
SharedEventPtr event_
Definition: ScopedContext.h:183

cms::cuda::impl::ScopedContextGetterBase::synchronizeStreams
void synchronizeStreams(int dataDevice, cudaStream_t dataStream, bool available, cudaEvent_t dataEvent)
Definition: ScopedContext.cc:62

edm::StreamID
Definition: StreamID.h:30

impl
Definition: trackAlgoPriorityOrder.h:18

std
Definition: JetResolutionObject.h:76

StreamCache.h

cms::cuda::SharedStreamPtr
std::shared_ptr< std::remove_pointer_t< cudaStream_t > > SharedStreamPtr
Definition: SharedStreamPtr.h:14

cms::cuda::stream
uint32_t T const  *__restrict__ uint32_t const  *__restrict__ int32_t int Histo::index_type cudaStream_t stream
Definition: HistoContainer.h:51

ScopedContext.h

cms::cuda::ScopedContextTask::~ScopedContextTask
~ScopedContextTask()
Definition: ScopedContext.cc:117

cms::cuda::impl::ScopedContextBase::stream_
SharedStreamPtr stream_
Definition: ScopedContext.h:52

LogTrace
#define LogTrace(id)
Definition: MessageLogger.h:234

cms::cuda::ScopedContextAcquire::contextState_
ContextState * contextState_
Definition: ScopedContext.h:140

edm::WaitingTaskWithArenaHolder
Definition: WaitingTaskWithArenaHolder.h:34

cms::cuda::ProductBase
Definition: ProductBase.h:20

edm::WaitingTaskWithArenaHolder::doneWaiting
void doneWaiting(std::exception_ptr iExcept)
Definition: WaitingTaskWithArenaHolder.cc:71

cms::cuda::impl::ScopedContextBase::streamPtr
const SharedStreamPtr & streamPtr() const
Definition: ScopedContext.h:35

chooseDevice.h

cms::cuda::chooseDevice
int chooseDevice(edm::StreamID id)
Definition: chooseDevice.cc:8

cms::cuda::ScopedContextTask::holderHelper_
impl::ScopedContextHolderHelper holderHelper_
Definition: ScopedContext.h:212

mps_update.status
status
Definition: mps_update.py:68

cms::cuda::StreamCache::get
SharedStreamPtr get()
Definition: StreamCache.cc:20

Service.h

cms::cuda::ScopedContextProduce::~ScopedContextProduce
~ScopedContextProduce()
Record the CUDA event, all asynchronous work must have been queued before the destructor.
Definition: ScopedContext.cc:108

cms::cuda::impl::ScopedContextHolderHelper::waitingTaskHolder_
edm::WaitingTaskWithArenaHolder waitingTaskHolder_
Definition: ScopedContext.h:90

cms::cuda::impl::ScopedContextBase::currentDevice_
int currentDevice_
Definition: ScopedContext.h:51

cms::cuda::ContextState::set
void set(int device, SharedStreamPtr stream)
Definition: ContextState.h:30

Exception.h

cms::cuda::ScopedContextAcquire::~ScopedContextAcquire
~ScopedContextAcquire()
Definition: ScopedContext.cc:93

cms::Exception
Definition: Exception.h:70

cms::cuda::impl::ScopedContextBase::device
int device() const
Definition: ScopedContext.h:28

cudaCheck.h

data
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79

cms::cuda::getStreamCache
StreamCache & getStreamCache()
Definition: StreamCache.cc:39

cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69

cms::cuda::impl::ScopedContextBase::stream
cudaStream_t stream() const
Definition: ScopedContext.h:34

cms::cuda::ScopedContextAcquire::holderHelper_
impl::ScopedContextHolderHelper holderHelper_
Definition: ScopedContext.h:139

cms::cuda::impl::ScopedContextHolderHelper::enqueueCallback
void enqueueCallback(int device, cudaStream_t stream)
Definition: ScopedContext.cc:85

eostools.move
def move(src, dest)
Definition: eostools.py:511

cms::cuda::impl::ScopedContextBase::ScopedContextBase
ScopedContextBase(edm::StreamID streamID)
Definition: ScopedContext.cc:41

cms::cuda::ScopedContextAcquire::throwNoState
void throwNoState()
Definition: ScopedContext.cc:100