CMS 3D CMS Logo

CachedBufAlloc.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
2 #define HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
3 
4 #include <alpaka/alpaka.hpp>
5 
9 
10 namespace cms::alpakatools {
11 
12  namespace traits {
13 
15  template <typename TElem,
16  typename TDim,
17  typename TIdx,
18  typename TDev,
19  typename TQueue,
20  typename = void,
21  typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
22  struct CachedBufAlloc {
23  static_assert(alpaka::meta::DependentFalseType<TDev>::value, "This device does not support a caching allocator");
24  };
25 
27  template <typename TElem, typename TDim, typename TIdx, typename TQueue>
28  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, TQueue, void> {
29  template <typename TExtent>
30  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev, TQueue queue, TExtent const& extent)
31  -> alpaka::BufCpu<TElem, TDim, TIdx> {
32  // non-cached, queue-ordered asynchronous host-only memory
33  return alpaka::allocAsyncBuf<TElem, TIdx>(queue, extent);
34  }
35  };
36 
37 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
38 
40  template <typename TElem, typename TDim, typename TIdx>
41  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtBlocking, void> {
42  template <typename TExtent>
43  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
44  alpaka::QueueCudaRtBlocking queue,
45  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
46  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
47 
48  auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtBlocking>();
49 
50  // FIXME the BufCpu does not support a pitch ?
51  size_t size = alpaka::getExtentProduct(extent);
52  size_t sizeBytes = size * sizeof(TElem);
53  void* memPtr = allocator.allocate(sizeBytes, queue);
54 
55  // use a custom deleter to return the buffer to the CachingAllocator
56  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
57 
58  return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
59  }
60  };
61 
63  template <typename TElem, typename TDim, typename TIdx>
64  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtNonBlocking, void> {
65  template <typename TExtent>
66  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
67  alpaka::QueueCudaRtNonBlocking queue,
68  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
69  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
70 
71  auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtNonBlocking>();
72 
73  // FIXME the BufCpu does not support a pitch ?
74  size_t size = alpaka::getExtentProduct(extent);
75  size_t sizeBytes = size * sizeof(TElem);
76  void* memPtr = allocator.allocate(sizeBytes, queue);
77 
78  // use a custom deleter to return the buffer to the CachingAllocator
79  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
80 
81  return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
82  }
83  };
84 
86  template <typename TElem, typename TDim, typename TIdx, typename TQueue>
87  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCudaRt, TQueue, void> {
88  template <typename TExtent>
89  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCudaRt const& dev, TQueue queue, TExtent const& extent)
90  -> alpaka::BufCudaRt<TElem, TDim, TIdx> {
91  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
92 
93  auto& allocator = getDeviceCachingAllocator<alpaka::DevCudaRt, TQueue>(dev);
94 
95  size_t width = alpaka::getWidth(extent);
96  size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
97  // TODO implement pitch for TDim > 1
98  size_t pitchBytes = widthBytes;
99  size_t size = alpaka::getExtentProduct(extent);
100  size_t sizeBytes = size * sizeof(TElem);
101  void* memPtr = allocator.allocate(sizeBytes, queue);
102 
103  // use a custom deleter to return the buffer to the CachingAllocator
104  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
105 
106  return alpaka::BufCudaRt<TElem, TDim, TIdx>(
107  dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), pitchBytes, extent);
108  }
109  };
110 
111 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED
112 
113 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
114 
116  template <typename TElem, typename TDim, typename TIdx>
117  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtBlocking, void> {
118  template <typename TExtent>
119  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
120  alpaka::QueueHipRtBlocking queue,
121  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
122  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
123 
124  auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtBlocking>();
125 
126  // FIXME the BufCpu does not support a pitch ?
127  size_t size = alpaka::getExtentProduct(extent);
128  size_t sizeBytes = size * sizeof(TElem);
129  void* memPtr = allocator.allocate(sizeBytes, queue);
130 
131  // use a custom deleter to return the buffer to the CachingAllocator
132  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
133 
134  return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
135  }
136  };
137 
139  template <typename TElem, typename TDim, typename TIdx>
140  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtNonBlocking, void> {
141  template <typename TExtent>
142  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
143  alpaka::QueueHipRtNonBlocking queue,
144  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
145  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
146 
147  auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtNonBlocking>();
148 
149  // FIXME the BufCpu does not support a pitch ?
150  size_t size = alpaka::getExtentProduct(extent);
151  size_t sizeBytes = size * sizeof(TElem);
152  void* memPtr = allocator.allocate(sizeBytes, queue);
153 
154  // use a custom deleter to return the buffer to the CachingAllocator
155  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
156 
157  return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
158  }
159  };
160 
162  template <typename TElem, typename TDim, typename TIdx, typename TQueue>
163  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevHipRt, TQueue, void> {
164  template <typename TExtent>
165  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev, TQueue queue, TExtent const& extent)
166  -> alpaka::BufHipRt<TElem, TDim, TIdx> {
167  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
168 
169  auto& allocator = getDeviceCachingAllocator<alpaka::DevHipRt, TQueue>(dev);
170 
171  size_t width = alpaka::getWidth(extent);
172  size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
173  // TODO implement pitch for TDim > 1
174  size_t pitchBytes = widthBytes;
175  size_t size = alpaka::getExtentProduct(extent);
176  size_t sizeBytes = size * sizeof(TElem);
177  void* memPtr = allocator.allocate(sizeBytes, queue);
178 
179  // use a custom deleter to return the buffer to the CachingAllocator
180  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
181 
182  return alpaka::BufHipRt<TElem, TDim, TIdx>(
183  dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), pitchBytes, extent);
184  }
185  };
186 
187 #endif // ALPAKA_ACC_GPU_HIP_ENABLED
188 
189  } // namespace traits
190 
191  template <typename TElem,
192  typename TIdx,
193  typename TExtent,
194  typename TQueue,
195  typename TDev,
196  typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
197  ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
198  return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
199  }
200 
201 } // namespace cms::alpakatools
202 
203 #endif // HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
size
Write out results.
ALPAKA_FN_HOST auto allocCachedBuf(TDev const &dev, TQueue queue, TExtent const &extent=TExtent())
TEMPL(T2) struct Divides void
Definition: Factorize.h:24
The caching memory allocator trait.
def move(src, dest)
Definition: eostools.py:511
static ALPAKA_FN_HOST auto allocCachedBuf(alpaka::DevCpu const &dev, TQueue queue, TExtent const &extent) -> alpaka::BufCpu< TElem, TDim, TIdx >