CMS 3D CMS Logo

ESProduct.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDACore_ESProduct_h
2 #define HeterogeneousCore_CUDACore_ESProduct_h
3 
4 #include <atomic>
5 #include <cassert>
6 #include <mutex>
7 #include <vector>
8 
15 
16 namespace cms {
17  namespace cuda {
18  template <typename T>
19  class ESProduct {
20  public:
22  for (size_t i = 0; i < gpuDataPerDevice_.size(); ++i) {
23  gpuDataPerDevice_[i].m_event = getEventCache().get();
24  }
25  }
26  ~ESProduct() = default;
27 
28  // transferAsync should be a function of (T&, cudaStream_t)
29  // which enqueues asynchronous transfers (possibly kernels as well)
30  // to the CUDA stream
31  template <typename F>
32  const T& dataForCurrentDeviceAsync(cudaStream_t cudaStream, F transferAsync) const {
33  auto device = currentDevice();
34 
35  auto& data = gpuDataPerDevice_[device];
36 
37  // If GPU data has already been filled, we can return it
38  // immediately
39  if (not data.m_filled.load()) {
40  // It wasn't, so need to fill it
41  std::scoped_lock<std::mutex> lk{data.m_mutex};
42 
43  if (data.m_filled.load()) {
44  // Other thread marked it filled while we were locking the mutex, so we're free to return it
45  return data.m_data;
46  }
47 
48  if (data.m_fillingStream != nullptr) {
49  // Someone else is filling
50 
51  // Check first if the recorded event has occurred
52  if (eventWorkHasCompleted(data.m_event.get())) {
53  // It was, so data is accessible from all CUDA streams on
54  // the device. Set the 'filled' for all subsequent calls and
55  // return the value
56  auto should_be_false = data.m_filled.exchange(true);
57  assert(not should_be_false);
58  data.m_fillingStream = nullptr;
59  } else if (data.m_fillingStream != cudaStream) {
60  // Filling is still going on. For other CUDA stream, add
61  // wait on the CUDA stream and return the value. Subsequent
62  // work queued on the stream will wait for the event to
63  // occur (i.e. transfer to finish).
64  cudaCheck(cudaStreamWaitEvent(cudaStream, data.m_event.get(), 0),
65  "Failed to make a stream to wait for an event");
66  }
67  // else: filling is still going on. But for the same CUDA
68  // stream (which would be a bit strange but fine), we can just
69  // return as all subsequent work should be enqueued to the
70  // same CUDA stream (or stream to be explicitly synchronized
71  // by the caller)
72  } else {
73  // Now we can be sure that the data is not yet on the GPU, and
74  // this thread is the first to try that.
75  transferAsync(data.m_data, cudaStream);
76  assert(data.m_fillingStream == nullptr);
77  data.m_fillingStream = cudaStream;
78  // Now the filling has been enqueued to the cudaStream, so we
79  // can return the GPU data immediately, since all subsequent
80  // work must be either enqueued to the cudaStream, or the cudaStream
81  // must be synchronized by the caller
82  }
83  }
84 
85  return data.m_data;
86  }
87 
88  private:
89  struct Item {
92  // non-null if some thread is already filling (cudaStream_t is just a pointer)
93  CMS_THREAD_GUARD(m_mutex) mutable cudaStream_t m_fillingStream = nullptr;
94  mutable std::atomic<bool> m_filled = false; // easy check if data has been filled already or not
96  };
97 
98  std::vector<Item> gpuDataPerDevice_;
99  };
100  } // namespace cuda
101 } // namespace cms
102 
103 #endif
mps_fire.i
i
Definition: mps_fire.py:428
cms::cuda::ESProduct::Item::m_data
T m_data
Definition: ESProduct.h:95
cms::cuda::SharedEventPtr
std::shared_ptr< std::remove_pointer_t< cudaEvent_t > > SharedEventPtr
Definition: SharedEventPtr.h:14
cms::cuda::ESProduct::Item
Definition: ESProduct.h:89
cms::cuda::assert
assert(be >=bs)
numberOfDevices.h
F
static uInt32 F(BLOWFISH_CTX *ctx, uInt32 x)
Definition: blowfish.cc:163
cms::cuda::ESProduct::~ESProduct
~ESProduct()=default
cms::cuda::EventCache::get
SharedEventPtr get()
Definition: EventCache.cc:21
cms::cuda::ESProduct::dataForCurrentDeviceAsync
const T & dataForCurrentDeviceAsync(cudaStream_t cudaStream, F transferAsync) const
Definition: ESProduct.h:32
cms::cuda::numberOfDevices
int numberOfDevices()
Definition: numberOfDevices.cc:6
cms::cuda::ESProduct
Definition: ESProduct.h:19
prod1Switch_cff.cuda
cuda
Definition: prod1Switch_cff.py:11
cms::cuda::currentDevice
int currentDevice()
Definition: currentDevice.h:10
cms::cuda::ESProduct::Item::m_filled
std::atomic< bool > m_filled
Definition: ESProduct.h:94
cms::cuda::ESProduct::Item::m_fillingStream
cudaStream_t m_fillingStream
Definition: ESProduct.h:93
cms::cuda::eventWorkHasCompleted
bool eventWorkHasCompleted(cudaEvent_t event)
Definition: eventWorkHasCompleted.h:18
cms::cuda::ESProduct::Item::m_mutex
std::mutex m_mutex
Definition: ESProduct.h:90
thread_safety_macros.h
cms::cuda::ESProduct::ESProduct
ESProduct()
Definition: ESProduct.h:21
mutex
static std::mutex mutex
Definition: Proxy.cc:8
cudaCheck.h
cms::cuda::ESProduct::Item::m_event
SharedEventPtr m_event
Definition: ESProduct.h:91
eventWorkHasCompleted.h
cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:62
T
long double T
Definition: Basic3DVectorLD.h:48
EventCache.h
cms::cuda::ESProduct::gpuDataPerDevice_
std::vector< Item > gpuDataPerDevice_
Definition: ESProduct.h:98
currentDevice.h
data
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
CMS_THREAD_GUARD
#define CMS_THREAD_GUARD(_var_)
Definition: thread_safety_macros.h:6
cms::cuda::getEventCache
EventCache & getEventCache()
Definition: EventCache.cc:64
cms
Namespace of DDCMS conversion namespace.
Definition: ProducerAnalyzer.cc:21