CMS 3D CMS Logo

ScopedContext.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDACore_ScopedContext_h
2 #define HeterogeneousCore_CUDACore_ScopedContext_h
3 
4 #include <optional>
5 
16 
17 namespace cms {
18  namespace cudatest {
19  class TestScopedContext;
20  }
21 
22  namespace cuda {
23 
24  namespace impl {
25  // This class is intended to be derived by other ScopedContext*, not for general use
27  public:
28  int device() const { return currentDevice_; }
29 
30  // cudaStream_t is a pointer to a thread-safe object, for which a
31  // mutable access is needed even if the ScopedContext itself
32  // would be const. Therefore it is ok to return a non-const
33  // pointer from a const method here.
34  cudaStream_t stream() const { return stream_.get(); }
35  const SharedStreamPtr& streamPtr() const { return stream_; }
36 
37  protected:
38  // The constructors set the current device, but the device
39  // is not set back to the previous value at the destructor. This
40  // should be sufficient (and tiny bit faster) as all CUDA API
41  // functions relying on the current device should be called from
42  // the scope where this context is. The current device doesn't
43  // really matter between modules (or across TBB tasks).
44  explicit ScopedContextBase(edm::StreamID streamID);
45 
46  explicit ScopedContextBase(const ProductBase& data);
47 
49 
50  private:
53  };
54 
56  public:
57  template <typename T>
58  const T& get(const Product<T>& data) {
59  synchronizeStreams(data.device(), data.stream(), data.isAvailable(), data.event());
60  return data.data_;
61  }
62 
63  template <typename T>
65  return get(iEvent.get(token));
66  }
67 
68  protected:
69  template <typename... Args>
70  ScopedContextGetterBase(Args&&... args) : ScopedContextBase(std::forward<Args>(args)...) {}
71 
72  void synchronizeStreams(int dataDevice, cudaStream_t dataStream, bool available, cudaEvent_t dataEvent);
73  };
74 
76  public:
78  : waitingTaskHolder_{std::move(waitingTaskHolder)} {}
79 
80  template <typename F>
81  void pushNextTask(F&& f, ContextState const* state);
82 
84  waitingTaskHolder_ = std::move(waitingTaskHolder);
85  }
86 
87  void enqueueCallback(int device, cudaStream_t stream);
88 
89  private:
91  };
92  } // namespace impl
93 
102  public:
105  : ScopedContextGetterBase(streamID), holderHelper_{std::move(waitingTaskHolder)} {}
106 
109  edm::WaitingTaskWithArenaHolder waitingTaskHolder,
111  : ScopedContextGetterBase(streamID), holderHelper_{std::move(waitingTaskHolder)}, contextState_{&state} {}
112 
115  : ScopedContextGetterBase(data), holderHelper_{std::move(waitingTaskHolder)} {}
116 
119  edm::WaitingTaskWithArenaHolder waitingTaskHolder,
122 
124 
125  template <typename F>
126  void pushNextTask(F&& f) {
127  if (contextState_ == nullptr)
128  throwNoState();
129  holderHelper_.pushNextTask(std::forward<F>(f), contextState_);
130  }
131 
134  }
135 
136  private:
137  void throwNoState();
138 
141  };
142 
150  public:
153 
156 
159  : ScopedContextGetterBase(state.device(), state.releaseStreamPtr()) {}
160 
163 
164  template <typename T>
165  std::unique_ptr<Product<T>> wrap(T data) {
166  // make_unique doesn't work because of private constructor
167  return std::unique_ptr<Product<T>>(new Product<T>(device(), streamPtr(), event_, std::move(data)));
168  }
169 
170  template <typename T, typename... Args>
172  return iEvent.emplace(token, device(), streamPtr(), event_, std::forward<Args>(args)...);
173  }
174 
175  private:
177 
178  // This construcor is only meant for testing
181 
182  // create the CUDA Event upfront to catch possible errors from its creation
184  };
185 
193  public:
196  : ScopedContextBase(state->device(), state->streamPtr()), // don't move, state is re-used afterwards
197  holderHelper_{std::move(waitingTaskHolder)},
198  contextState_{state} {}
199 
201 
202  template <typename F>
203  void pushNextTask(F&& f) {
204  holderHelper_.pushNextTask(std::forward<F>(f), contextState_);
205  }
206 
209  }
210 
211  private:
214  };
215 
223  public:
226  };
227 
228  namespace impl {
229  template <typename F>
231  auto group = waitingTaskHolder_.group();
233  *group,
235  [state, func = std::forward<F>(f)](edm::WaitingTaskWithArenaHolder h) {
237  })});
238  }
239  } // namespace impl
240  } // namespace cuda
241 } // namespace cms
242 
243 #endif
ScopedContextAcquire(edm::StreamID streamID, edm::WaitingTaskWithArenaHolder waitingTaskHolder)
Constructor to create a new CUDA stream (no need for context beyond acquire())
ScopedContextTask(ContextState const *state, edm::WaitingTaskWithArenaHolder waitingTaskHolder)
Constructor to re-use the CUDA stream of acquire() (ExternalWork module)
std::shared_ptr< std::remove_pointer_t< cudaEvent_t > > SharedEventPtr
void synchronizeStreams(int dataDevice, cudaStream_t dataStream, bool available, cudaEvent_t dataEvent)
ScopedContextHolderHelper(edm::WaitingTaskWithArenaHolder waitingTaskHolder)
Definition: ScopedContext.h:77
void pushNextTask(F &&f, ContextState const *state)
std::shared_ptr< std::remove_pointer_t< cudaStream_t > > SharedStreamPtr
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
ScopedContextAnalyze(const ProductBase &data)
Constructor to (possibly) re-use a CUDA stream.
void replaceWaitingTaskHolder(edm::WaitingTaskWithArenaHolder waitingTaskHolder)
void replaceWaitingTaskHolder(edm::WaitingTaskWithArenaHolder waitingTaskHolder)
Definition: ScopedContext.h:83
int iEvent
Definition: GenABIO.cc:224
SharedEventPtr get()
Definition: EventCache.cc:21
oneapi::tbb::task_group * group() const
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t Func __host__ __device__ V int Func func
const SharedStreamPtr & streamPtr() const
Definition: ScopedContext.h:35
ContextState const * contextState_
ScopedContextProduce(const ProductBase &data)
Constructor to (possibly) re-use a CUDA stream (non-ExternalWork module)
impl::ScopedContextHolderHelper holderHelper_
double f[11][100]
~ScopedContextProduce()
Record the CUDA event, all asynchronous work must have been queued before the destructor.
edm::WaitingTaskWithArenaHolder waitingTaskHolder_
Definition: ScopedContext.h:90
Namespace of DDCMS conversion namespace.
std::unique_ptr< Product< T > > wrap(T data)
ScopedContextAcquire(edm::StreamID streamID, edm::WaitingTaskWithArenaHolder waitingTaskHolder, ContextState &state)
Constructor to create a new CUDA stream, and the context is needed after acquire() ...
ScopedContextAcquire(const ProductBase &data, edm::WaitingTaskWithArenaHolder waitingTaskHolder, ContextState &state)
Constructor to (possibly) re-use a CUDA stream, and the context is needed after acquire() ...
auto make_waiting_task_with_holder(WaitingTaskWithArenaHolder h, F &&f)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
ScopedContextProduce(int device, SharedStreamPtr stream, SharedEventPtr event)
ScopedContextProduce(ContextState &state)
Constructor to re-use the CUDA stream of acquire() (ExternalWork module)
void replaceWaitingTaskHolder(edm::WaitingTaskWithArenaHolder waitingTaskHolder)
ScopedContextAcquire(const ProductBase &data, edm::WaitingTaskWithArenaHolder waitingTaskHolder)
Constructor to (possibly) re-use a CUDA stream (no need for context beyond acquire()) ...
friend class cudatest::TestScopedContext
EventCache & getEventCache()
Definition: EventCache.cc:64
ScopedContextProduce(edm::StreamID streamID)
Constructor to create a new CUDA stream (non-ExternalWork module)
static uInt32 F(BLOWFISH_CTX *ctx, uInt32 x)
Definition: blowfish.cc:163
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
long double T
impl::ScopedContextHolderHelper holderHelper_
auto emplace(edm::Event &iEvent, edm::EDPutTokenT< T > token, Args &&... args)
void enqueueCallback(int device, cudaStream_t stream)
def move(src, dest)
Definition: eostools.py:511
Definition: event.py:1
ScopedContextBase(edm::StreamID streamID)