CMS 3D CMS Logo

CachedBufAlloc.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
2 #define HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
3 
4 #include <alpaka/alpaka.hpp>
5 
8 
9 namespace cms::alpakatools {
10 
11  namespace traits {
12 
14  template <typename TElem,
15  typename TDim,
16  typename TIdx,
17  typename TDev,
18  typename TQueue,
19  typename = void,
20  typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
21  struct CachedBufAlloc {
22  static_assert(alpaka::meta::DependentFalseType<TDev>::value, "This device does not support a caching allocator");
23  };
24 
26  template <typename TElem, typename TDim, typename TIdx, typename TQueue>
27  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, TQueue, void> {
28  template <typename TExtent>
29  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
30  TQueue queue,
31  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
32  // non-cached, queue-ordered asynchronous host-only memory
33  return alpaka::allocAsyncBuf<TElem, TIdx>(queue, extent);
34  }
35  };
36 
37 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
38 
40  template <typename TElem, typename TDim, typename TIdx>
41  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtBlocking, void> {
42  template <typename TExtent>
43  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
44  alpaka::QueueCudaRtBlocking queue,
45  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
46  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
47 
48  auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtBlocking>();
49 
50  // FIXME the BufCpu does not support a pitch ?
51  size_t size = alpaka::getExtentProduct(extent);
52  size_t sizeBytes = size * sizeof(TElem);
53  void* memPtr = allocator.allocate(sizeBytes, queue);
54 
55  // use a custom deleter to return the buffer to the CachingAllocator
56  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
57 
58  return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
59  }
60  };
61 
63  template <typename TElem, typename TDim, typename TIdx>
64  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtNonBlocking, void> {
65  template <typename TExtent>
66  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
67  alpaka::QueueCudaRtNonBlocking queue,
68  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
69  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
70 
71  auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtNonBlocking>();
72 
73  // FIXME the BufCpu does not support a pitch ?
74  size_t size = alpaka::getExtentProduct(extent);
75  size_t sizeBytes = size * sizeof(TElem);
76  void* memPtr = allocator.allocate(sizeBytes, queue);
77 
78  // use a custom deleter to return the buffer to the CachingAllocator
79  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
80 
81  return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
82  }
83  };
84 
86  template <typename TElem, typename TDim, typename TIdx, typename TQueue>
87  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCudaRt, TQueue, void> {
88  template <typename TExtent>
89  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCudaRt const& dev,
90  TQueue queue,
91  TExtent const& extent) -> alpaka::BufCudaRt<TElem, TDim, TIdx> {
92  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
93 
94  auto& allocator = getDeviceCachingAllocator<alpaka::DevCudaRt, TQueue>(dev);
95 
96  size_t width = alpaka::getWidth(extent);
97  size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
98  // TODO implement pitch for TDim > 1
99  size_t pitchBytes = widthBytes;
100  size_t size = alpaka::getExtentProduct(extent);
101  size_t sizeBytes = size * sizeof(TElem);
102  void* memPtr = allocator.allocate(sizeBytes, queue);
103 
104  // use a custom deleter to return the buffer to the CachingAllocator
105  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
106 
107  return alpaka::BufCudaRt<TElem, TDim, TIdx>(
108  dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
109  }
110  };
111 
112 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED
113 
114 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
115 
117  template <typename TElem, typename TDim, typename TIdx>
118  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtBlocking, void> {
119  template <typename TExtent>
120  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
121  alpaka::QueueHipRtBlocking queue,
122  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
123  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
124 
125  auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtBlocking>();
126 
127  // FIXME the BufCpu does not support a pitch ?
128  size_t size = alpaka::getExtentProduct(extent);
129  size_t sizeBytes = size * sizeof(TElem);
130  void* memPtr = allocator.allocate(sizeBytes, queue);
131 
132  // use a custom deleter to return the buffer to the CachingAllocator
133  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
134 
135  return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
136  }
137  };
138 
140  template <typename TElem, typename TDim, typename TIdx>
141  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtNonBlocking, void> {
142  template <typename TExtent>
143  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
144  alpaka::QueueHipRtNonBlocking queue,
145  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
146  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
147 
148  auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtNonBlocking>();
149 
150  // FIXME the BufCpu does not support a pitch ?
151  size_t size = alpaka::getExtentProduct(extent);
152  size_t sizeBytes = size * sizeof(TElem);
153  void* memPtr = allocator.allocate(sizeBytes, queue);
154 
155  // use a custom deleter to return the buffer to the CachingAllocator
156  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
157 
158  return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
159  }
160  };
161 
163  template <typename TElem, typename TDim, typename TIdx, typename TQueue>
164  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevHipRt, TQueue, void> {
165  template <typename TExtent>
166  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev,
167  TQueue queue,
168  TExtent const& extent) -> alpaka::BufHipRt<TElem, TDim, TIdx> {
169  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
170 
171  auto& allocator = getDeviceCachingAllocator<alpaka::DevHipRt, TQueue>(dev);
172 
173  size_t width = alpaka::getWidth(extent);
174  size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
175  // TODO implement pitch for TDim > 1
176  size_t pitchBytes = widthBytes;
177  size_t size = alpaka::getExtentProduct(extent);
178  size_t sizeBytes = size * sizeof(TElem);
179  void* memPtr = allocator.allocate(sizeBytes, queue);
180 
181  // use a custom deleter to return the buffer to the CachingAllocator
182  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
183 
184  return alpaka::BufHipRt<TElem, TDim, TIdx>(
185  dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
186  }
187  };
188 
189 #endif // ALPAKA_ACC_GPU_HIP_ENABLED
190 
191  } // namespace traits
192 
193  template <typename TElem,
194  typename TIdx,
195  typename TExtent,
196  typename TQueue,
197  typename TDev,
198  typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
199  ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
200  return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
201  }
202 
203 } // namespace cms::alpakatools
204 
205 #endif // HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
size
Write out results.
ALPAKA_FN_HOST auto allocCachedBuf(TDev const &dev, TQueue queue, TExtent const &extent=TExtent())
TEMPL(T2) struct Divides void
Definition: Factorize.h:24
The caching memory allocator trait.
def move(src, dest)
Definition: eostools.py:511
static ALPAKA_FN_HOST auto allocCachedBuf(alpaka::DevCpu const &dev, TQueue queue, TExtent const &extent) -> alpaka::BufCpu< TElem, TDim, TIdx >