CMS 3D CMS Logo

ScopedContext.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDACore_ScopedContext_h
2 #define HeterogeneousCore_CUDACore_ScopedContext_h
3 
4 #include <optional>
5 
16 
17 namespace cms {
18  namespace cudatest {
19  class TestScopedContext;
20  }
21 
22  namespace cuda {
23 
24  namespace impl {
25  // This class is intended to be derived by other ScopedContext*, not for general use
27  public:
28  int device() const { return currentDevice_; }
29 
30  // cudaStream_t is a pointer to a thread-safe object, for which a
31  // mutable access is needed even if the ScopedContext itself
32  // would be const. Therefore it is ok to return a non-const
33  // pointer from a const method here.
34  cudaStream_t stream() const { return stream_.get(); }
35  const SharedStreamPtr& streamPtr() const { return stream_; }
36 
37  protected:
38  // The constructors set the current device, but the device
39  // is not set back to the previous value at the destructor. This
40  // should be sufficient (and tiny bit faster) as all CUDA API
41  // functions relying on the current device should be called from
42  // the scope where this context is. The current device doesn't
43  // really matter between modules (or across TBB tasks).
44  explicit ScopedContextBase(edm::StreamID streamID);
45 
46  explicit ScopedContextBase(const ProductBase& data);
47 
49 
50  private:
53  };
54 
56  public:
57  template <typename T>
58  const T& get(const Product<T>& data) {
59  synchronizeStreams(data.device(), data.stream(), data.isAvailable(), data.event());
60  return data.data_;
61  }
62 
63  template <typename T>
65  return get(iEvent.get(token));
66  }
67 
68  protected:
69  template <typename... Args>
70  ScopedContextGetterBase(Args&&... args) : ScopedContextBase(std::forward<Args>(args)...) {}
71 
72  void synchronizeStreams(int dataDevice, cudaStream_t dataStream, bool available, cudaEvent_t dataEvent);
73  };
74 
76  public:
78  : waitingTaskHolder_{std::move(waitingTaskHolder)} {}
79 
80  template <typename F>
81  void pushNextTask(F&& f, ContextState const* state);
82 
84  waitingTaskHolder_ = std::move(waitingTaskHolder);
85  }
86 
87  void enqueueCallback(int device, cudaStream_t stream);
88 
89  private:
91  };
92  } // namespace impl
93 
102  public:
105  : ScopedContextGetterBase(streamID), holderHelper_{std::move(waitingTaskHolder)} {}
106 
109  edm::WaitingTaskWithArenaHolder waitingTaskHolder,
111  : ScopedContextGetterBase(streamID), holderHelper_{std::move(waitingTaskHolder)}, contextState_{&state} {}
112 
115  : ScopedContextGetterBase(data), holderHelper_{std::move(waitingTaskHolder)} {}
116 
119  edm::WaitingTaskWithArenaHolder waitingTaskHolder,
122 
124 
125  template <typename F>
126  void pushNextTask(F&& f) {
127  if (contextState_ == nullptr)
128  throwNoState();
129  holderHelper_.pushNextTask(std::forward<F>(f), contextState_);
130  }
131 
134  }
135 
136  private:
137  void throwNoState();
138 
141  };
142 
150  public:
153 
156 
159  : ScopedContextGetterBase(state.device(), state.releaseStreamPtr()) {}
160 
163 
164  template <typename T>
165  std::unique_ptr<Product<T>> wrap(T data) {
166  // make_unique doesn't work because of private constructor
167  return std::unique_ptr<Product<T>>(new Product<T>(device(), streamPtr(), event_, std::move(data)));
168  }
169 
170  template <typename T, typename... Args>
172  return iEvent.emplace(token, device(), streamPtr(), event_, std::forward<Args>(args)...);
173  }
174 
175  private:
177 
178  // This construcor is only meant for testing
181 
182  // create the CUDA Event upfront to catch possible errors from its creation
184  };
185 
193  public:
196  : ScopedContextBase(state->device(), state->streamPtr()), // don't move, state is re-used afterwards
197  holderHelper_{std::move(waitingTaskHolder)},
198  contextState_{state} {}
199 
201 
202  template <typename F>
203  void pushNextTask(F&& f) {
204  holderHelper_.pushNextTask(std::forward<F>(f), contextState_);
205  }
206 
209  }
210 
211  private:
214  };
215 
223  public:
226  };
227 
228  namespace impl {
229  template <typename F>
231  auto group = waitingTaskHolder_.group();
233  *group,
235  [state, func = std::forward<F>(f)](edm::WaitingTaskWithArenaHolder h) {
237  })});
238  }
239  } // namespace impl
240  } // namespace cuda
241 } // namespace cms
242 
243 #endif
cms::cuda::ScopedContextTask::ScopedContextTask
ScopedContextTask(ContextState const *state, edm::WaitingTaskWithArenaHolder waitingTaskHolder)
Constructor to re-use the CUDA stream of acquire() (ExternalWork module)
Definition: ScopedContext.h:195
writedatasetfile.args
args
Definition: writedatasetfile.py:18
cms::cuda::impl::ScopedContextHolderHelper::waitingTaskHolder_
edm::WaitingTaskWithArenaHolder waitingTaskHolder_
Definition: ScopedContext.h:90
edm::StreamID
Definition: StreamID.h:30
cms::cuda::ScopedContextAcquire::holderHelper_
impl::ScopedContextHolderHelper holderHelper_
Definition: ScopedContext.h:139
cms::cuda::ScopedContextTask::contextState_
ContextState const * contextState_
Definition: ScopedContext.h:213
cms::cuda::ScopedContextProduce::ScopedContextProduce
ScopedContextProduce(edm::StreamID streamID)
Constructor to create a new CUDA stream (non-ExternalWork module)
Definition: ScopedContext.h:152
cms::cuda::ScopedContextProduce::emplace
auto emplace(edm::Event &iEvent, edm::EDPutTokenT< T > token, Args &&... args)
Definition: ScopedContext.h:171
cms::cuda::impl::ScopedContextGetterBase::get
const T & get(const Product< T > &data)
Definition: ScopedContext.h:58
cms::cuda::ScopedContextAcquire::throwNoState
void throwNoState()
Definition: ScopedContext.cc:100
cms::cuda::ScopedContextAcquire::ScopedContextAcquire
ScopedContextAcquire(edm::StreamID streamID, edm::WaitingTaskWithArenaHolder waitingTaskHolder)
Constructor to create a new CUDA stream (no need for context beyond acquire())
Definition: ScopedContext.h:104
f
double f[11][100]
Definition: MuScleFitUtils.cc:78
cms::cuda::SharedEventPtr
std::shared_ptr< std::remove_pointer_t< cudaEvent_t > > SharedEventPtr
Definition: SharedEventPtr.h:14
cms::cuda::ScopedContextProduce
Definition: ScopedContext.h:149
edm::EDGetTokenT
Definition: EDGetToken.h:33
cms::cuda::ScopedContextTask::holderHelper_
impl::ScopedContextHolderHelper holderHelper_
Definition: ScopedContext.h:212
cms::cuda::ScopedContextTask::pushNextTask
void pushNextTask(F &&f)
Definition: ScopedContext.h:203
edm::EDPutTokenT
Definition: EDPutToken.h:33
cms::cuda::stream
cudaStream_t stream
Definition: HistoContainer.h:57
cms::cuda::impl::ScopedContextBase::stream
cudaStream_t stream() const
Definition: ScopedContext.h:34
cms::cuda::SharedStreamPtr
std::shared_ptr< std::remove_pointer_t< cudaStream_t > > SharedStreamPtr
Definition: SharedStreamPtr.h:14
cms::cuda::ScopedContextAnalyze
Definition: ScopedContext.h:222
cms::cuda::impl::ScopedContextHolderHelper::enqueueCallback
void enqueueCallback(int device, cudaStream_t stream)
Definition: ScopedContext.cc:85
cms::cuda::impl::ScopedContextHolderHelper::replaceWaitingTaskHolder
void replaceWaitingTaskHolder(edm::WaitingTaskWithArenaHolder waitingTaskHolder)
Definition: ScopedContext.h:83
edm::make_waiting_task_with_holder
auto make_waiting_task_with_holder(WaitingTaskWithArenaHolder h, F &&f)
Definition: WaitingTaskWithArenaHolder.h:104
cms::cuda::ScopedContextAnalyze::ScopedContextAnalyze
ScopedContextAnalyze(const ProductBase &data)
Constructor to (possibly) re-use a CUDA stream.
Definition: ScopedContext.h:225
cms::cuda::impl::ScopedContextGetterBase::synchronizeStreams
void synchronizeStreams(int dataDevice, cudaStream_t dataStream, bool available, cudaEvent_t dataEvent)
Definition: ScopedContext.cc:62
SiPixelRawToDigi_cfi.cuda
cuda
Definition: SiPixelRawToDigi_cfi.py:14
cms::cuda::ScopedContextAcquire::contextState_
ContextState * contextState_
Definition: ScopedContext.h:140
cms::cuda::impl::ScopedContextHolderHelper::pushNextTask
void pushNextTask(F &&f, ContextState const *state)
Definition: ScopedContext.h:230
cms::cuda::ScopedContextTask::replaceWaitingTaskHolder
void replaceWaitingTaskHolder(edm::WaitingTaskWithArenaHolder waitingTaskHolder)
Definition: ScopedContext.h:207
cms::cuda::ScopedContextProduce::event_
SharedEventPtr event_
Definition: ScopedContext.h:183
F
static uInt32 F(BLOWFISH_CTX *ctx, uInt32 x)
Definition: blowfish.cc:163
cms::cuda::impl::ScopedContextGetterBase::ScopedContextGetterBase
ScopedContextGetterBase(Args &&... args)
Definition: ScopedContext.h:70
edm::WaitingTaskWithArenaHolder::group
tbb::task_group * group() const
Definition: WaitingTaskWithArenaHolder.h:83
edm::WaitingTaskWithArenaHolder
Definition: WaitingTaskWithArenaHolder.h:34
cms::cuda::EventCache::get
SharedEventPtr get()
Definition: EventCache.cc:21
cms::cuda::ScopedContextAcquire::pushNextTask
void pushNextTask(F &&f)
Definition: ScopedContext.h:126
cms::cuda::ScopedContextTask::~ScopedContextTask
~ScopedContextTask()
Definition: ScopedContext.cc:117
cms::cuda::impl::ScopedContextBase::streamPtr
const SharedStreamPtr & streamPtr() const
Definition: ScopedContext.h:35
cms::cuda::func
cudaStream_t T uint32_t const T *__restrict__ const uint32_t *__restrict__ uint32_t int cudaStream_t Func __host__ __device__ V int Func func
Definition: HistoContainer.h:124
cms::cuda::impl::ScopedContextBase::stream_
SharedStreamPtr stream_
Definition: ScopedContext.h:52
cms::cuda::ScopedContextProduce::wrap
std::unique_ptr< Product< T > > wrap(T data)
Definition: ScopedContext.h:165
h
EDGetToken.h
cms::cuda::impl::ScopedContextBase::currentDevice_
int currentDevice_
Definition: ScopedContext.h:51
cms::cuda::ContextState
Definition: ContextState.h:15
cms::cuda::ProductBase
Definition: ProductBase.h:20
Event.h
WaitingTaskWithArenaHolder.h
SharedStreamPtr.h
EDPutToken.h
cms::cuda::impl::ScopedContextBase
Definition: ScopedContext.h:26
cms::cuda::ScopedContextAcquire::ScopedContextAcquire
ScopedContextAcquire(edm::StreamID streamID, edm::WaitingTaskWithArenaHolder waitingTaskHolder, ContextState &state)
Constructor to create a new CUDA stream, and the context is needed after acquire()
Definition: ScopedContext.h:108
iEvent
int iEvent
Definition: GenABIO.cc:224
cms::cuda::ScopedContextProduce::~ScopedContextProduce
~ScopedContextProduce()
Record the CUDA event, all asynchronous work must have been queued before the destructor.
Definition: ScopedContext.cc:108
cms::cuda::impl::ScopedContextGetterBase
Definition: ScopedContext.h:55
cms::cuda::ScopedContextProduce::ScopedContextProduce
ScopedContextProduce(const ProductBase &data)
Constructor to (possibly) re-use a CUDA stream (non-ExternalWork module)
Definition: ScopedContext.h:155
cms::cuda::impl::ScopedContextBase::ScopedContextBase
ScopedContextBase(edm::StreamID streamID)
Definition: ScopedContext.cc:41
cms::cuda::ScopedContextAcquire
Definition: ScopedContext.h:101
cms::cuda::impl::ScopedContextHolderHelper
Definition: ScopedContext.h:75
Product.h
cms::cuda::ScopedContextAcquire::ScopedContextAcquire
ScopedContextAcquire(const ProductBase &data, edm::WaitingTaskWithArenaHolder waitingTaskHolder)
Constructor to (possibly) re-use a CUDA stream (no need for context beyond acquire())
Definition: ScopedContext.h:114
ContextState.h
impl
Definition: trackAlgoPriorityOrder.h:18
eostools.move
def move(src, dest)
Definition: eostools.py:511
std
Definition: JetResolutionObject.h:76
cms::cuda::ScopedContextProduce::ScopedContextProduce
ScopedContextProduce(int device, SharedStreamPtr stream, SharedEventPtr event)
Definition: ScopedContext.h:179
cms::cuda::ScopedContextProduce::ScopedContextProduce
ScopedContextProduce(ContextState &state)
Constructor to re-use the CUDA stream of acquire() (ExternalWork module)
Definition: ScopedContext.h:158
cms::cuda::ScopedContextAcquire::~ScopedContextAcquire
~ScopedContextAcquire()
Definition: ScopedContext.cc:93
RunInfoPI::state
state
Definition: RunInfoPayloadInspectoHelper.h:16
cms::cuda::ScopedContextProduce::TestScopedContext
friend class cudatest::TestScopedContext
Definition: ScopedContext.h:176
cms::cuda::ScopedContextAcquire::ScopedContextAcquire
ScopedContextAcquire(const ProductBase &data, edm::WaitingTaskWithArenaHolder waitingTaskHolder, ContextState &state)
Constructor to (possibly) re-use a CUDA stream, and the context is needed after acquire()
Definition: ScopedContext.h:118
T
long double T
Definition: Basic3DVectorLD.h:48
cms::cuda::impl::ScopedContextHolderHelper::ScopedContextHolderHelper
ScopedContextHolderHelper(edm::WaitingTaskWithArenaHolder waitingTaskHolder)
Definition: ScopedContext.h:77
cms::cuda::Product
Definition: Product.h:34
cms::cuda::impl::ScopedContextGetterBase::get
const T & get(const edm::Event &iEvent, edm::EDGetTokenT< Product< T >> token)
Definition: ScopedContext.h:64
cms::cuda::ScopedContextTask
Definition: ScopedContext.h:192
EventCache.h
SharedEventPtr.h
data
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
cms::cuda::ScopedContextAcquire::replaceWaitingTaskHolder
void replaceWaitingTaskHolder(edm::WaitingTaskWithArenaHolder waitingTaskHolder)
Definition: ScopedContext.h:132
event
Definition: event.py:1
edm::Event
Definition: Event.h:73
StreamID.h
cms::cuda::getEventCache
EventCache & getEventCache()
Definition: EventCache.cc:64
cms::cuda::impl::ScopedContextBase::device
int device() const
Definition: ScopedContext.h:28
cms
Namespace of DDCMS conversion namespace.
Definition: ProducerAnalyzer.cc:21
watchdog.group
group
Definition: watchdog.py:82
unpackBuffers-CaloStage2.token
token
Definition: unpackBuffers-CaloStage2.py:316