CMS 3D CMS Logo

ESProduct.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDACore_ESProduct_h
2 #define HeterogeneousCore_CUDACore_ESProduct_h
3 
4 #include <atomic>
5 #include <cassert>
6 #include <mutex>
7 #include <vector>
8 
16 
17 namespace cms {
18  namespace cuda {
19  template <typename T>
20  class ESProduct {
21  public:
23  if (not gpuDataPerDevice_.empty()) {
24  cms::cuda::ScopedSetDevice scopedDevice;
25  for (size_t i = 0; i < gpuDataPerDevice_.size(); ++i) {
26  scopedDevice.set(i);
27  gpuDataPerDevice_[i].m_event = getEventCache().get();
28  }
29  }
30  }
31 
32  ~ESProduct() = default;
33 
34  // transferAsync should be a function of (T&, cudaStream_t)
35  // which enqueues asynchronous transfers (possibly kernels as well)
36  // to the CUDA stream
37  template <typename F>
38  const T& dataForCurrentDeviceAsync(cudaStream_t cudaStream, F transferAsync) const {
39  int device = currentDevice();
40  auto& data = gpuDataPerDevice_[device];
41 
42  // If the GPU data has already been filled, we can return it immediately
43  if (not data.m_filled.load()) {
44  // It wasn't, so need to fill it
45  std::scoped_lock<std::mutex> lk{data.m_mutex};
46 
47  if (data.m_filled.load()) {
48  // Other thread marked it filled while we were locking the mutex, so we're free to return it
49  return data.m_data;
50  }
51 
52  if (data.m_fillingStream != nullptr) {
53  // Someone else is filling
54 
55  // Check first if the recorded event has occurred
56  if (eventWorkHasCompleted(data.m_event.get())) {
57  // It was, so data is accessible from all CUDA streams on
58  // the device. Set the 'filled' for all subsequent calls and
59  // return the value
60  auto should_be_false = data.m_filled.exchange(true);
61  assert(not should_be_false);
62  data.m_fillingStream = nullptr;
63  } else if (data.m_fillingStream != cudaStream) {
64  // Filling is still going on. For other CUDA stream, add
65  // wait on the CUDA stream and return the value. Subsequent
66  // work queued on the stream will wait for the event to
67  // occur (i.e. transfer to finish).
68  cudaCheck(cudaStreamWaitEvent(cudaStream, data.m_event.get(), 0),
69  "Failed to make a stream to wait for an event");
70  }
71  // else: filling is still going on. But for the same CUDA
72  // stream (which would be a bit strange but fine), we can just
73  // return as all subsequent work should be enqueued to the
74  // same CUDA stream (or stream to be explicitly synchronized
75  // by the caller)
76  } else {
77  // Now we can be sure that the data is not yet on the GPU, and
78  // this thread is the first to try that.
79  transferAsync(data.m_data, cudaStream);
80  assert(data.m_fillingStream == nullptr);
81  data.m_fillingStream = cudaStream;
82  // Record in the cudaStream an event to mark the readiness of the
83  // EventSetup data on the GPU, so other streams can check for it
84  cudaCheck(cudaEventRecord(data.m_event.get(), cudaStream));
85  // Now the filling has been enqueued to the cudaStream, so we
86  // can return the GPU data immediately, since all subsequent
87  // work must be either enqueued to the cudaStream, or the cudaStream
88  // must be synchronized by the caller
89  }
90  }
91 
92  return data.m_data;
93  }
94 
95  private:
96  struct Item {
99  // non-null if some thread is already filling (cudaStream_t is just a pointer)
100  CMS_THREAD_GUARD(m_mutex) mutable cudaStream_t m_fillingStream = nullptr;
101  mutable std::atomic<bool> m_filled = false; // easy check if data has been filled already or not
103  };
104 
105  std::vector<Item> gpuDataPerDevice_;
106  };
107  } // namespace cuda
108 } // namespace cms
109 
110 #endif // HeterogeneousCore_CUDACore_ESProduct_h
const T & dataForCurrentDeviceAsync(cudaStream_t cudaStream, F transferAsync) const
Definition: ESProduct.h:38
std::shared_ptr< std::remove_pointer_t< cudaEvent_t > > SharedEventPtr
static std::mutex mutex
Definition: Proxy.cc:8
cudaStream_t m_fillingStream
Definition: ESProduct.h:100
assert(be >=bs)
SharedEventPtr get()
Definition: EventCache.cc:21
#define CMS_THREAD_GUARD(_var_)
Namespace of DDCMS conversion namespace.
std::vector< Item > gpuDataPerDevice_
Definition: ESProduct.h:105
std::atomic< bool > m_filled
Definition: ESProduct.h:101
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
SharedEventPtr m_event
Definition: ESProduct.h:98
int numberOfDevices()
EventCache & getEventCache()
Definition: EventCache.cc:64
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
static uInt32 F(BLOWFISH_CTX *ctx, uInt32 x)
Definition: blowfish.cc:163
long double T
bool eventWorkHasCompleted(cudaEvent_t event)
int currentDevice()
Definition: currentDevice.h:10