CMS 3D CMS Logo

launch.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDAUtilities_launch_h
2 #define HeterogeneousCore_CUDAUtilities_launch_h
3 
4 #include <tuple>
5 
6 #include <cuda_runtime.h>
7 
9 
10 /*
11  * `cms::cuda::launch` and `cms::cuda::launch_cooperative` are wrappers around
12  * the CUDA Runtime API calls to setup and call a CUDA kernel from the host.
13  *
14  * `kernel` should be a pointer to a __global__ void(...) function.
15  * `config` describe the launch configuration: the grid size and block size, the
16  * dynamic shared memory size (default to 0) and the CUDA stream to use
17  * (default to 0, the default stream).
18  * `args` are the arguments passed (by value) to the kernel.
19  *
20  * Currently this is requires an extra copy to perform the necessary implicit
21  * conversions and ensure that the arguments match the kernel function signature;
22  * the extra copy could eventually be avoided for arguments that are already of
23  * the exact type.
24  *
25  * Unlike the `kernel<<<...>>>(...)` syntax and the `cuda::launch(...)`
26  * implementation from the CUDA API Wrappers, `cms::cuda::launch(...)` and
27  * `cms::cuda::launch_cooperative` can be called from standard C++ host code.
28  *
29  * Possible optimisations
30  *
31  * - once C++17 is available in CUDA, replace the `pointer_setter` functor
32  * with a simpler function using fold expressions:
33  *
34  * template<int N, class Tuple, std::size_t... Is>
35  * void pointer_setter(void* ptrs[N], Tuple const& t, std::index_sequence<Is...>)
36  * {
37  * ((ptrs[Is] = & std::get<Is>(t)), ...);
38  * }
39  *
40  * - add a template specialisation to `launch` and `launch_cooperative` to
41  * avoid making a temporary copy of the parameters when they match the
42  * kernel signature.
43  */
44 
45 namespace cms {
46  namespace cuda {
47 
49  dim3 gridDim;
50  dim3 blockDim;
51  size_t sharedMem;
52  cudaStream_t stream;
53 
54  LaunchParameters(dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, cudaStream_t stream = nullptr)
56 
57  LaunchParameters(int gridDim, int blockDim, size_t sharedMem = 0, cudaStream_t stream = nullptr)
59  };
60 
61  namespace detail {
62 
63  template <typename T>
64  struct kernel_traits;
65 
66  template <typename... Args>
67  struct kernel_traits<void(Args...)> {
68  static constexpr size_t arguments_size = sizeof...(Args);
69 
70  using argument_type_tuple = std::tuple<Args...>;
71 
72  template <size_t i>
74  };
75 
76  // fill an array with the pointers to the elements of a tuple
77  template <int I>
78  struct pointer_setter {
79  template <typename Tuple>
80  void operator()(void const* ptrs[], Tuple const& t) {
81  pointer_setter<I - 1>()(ptrs, t);
82  ptrs[I - 1] = &std::get<I - 1>(t);
83  }
84  };
85 
86  template <>
87  struct pointer_setter<0> {
88  template <typename Tuple>
89  void operator()(void const* ptrs[], Tuple const& t) {}
90  };
91 
92  } // namespace detail
93 
94  // wrappers for cudaLaunchKernel
95 
96  inline void launch(void (*kernel)(), LaunchParameters config) {
97  cudaCheck(cudaLaunchKernel(
98  (const void*)kernel, config.gridDim, config.blockDim, nullptr, config.sharedMem, config.stream));
99  }
100 
101  template <typename F, typename... Args>
102 #if __cplusplus >= 201703L
103  std::enable_if_t<std::is_invocable_r<void, F, Args&&...>::value>
104 #else
105  std::enable_if_t<std::is_void<std::result_of_t<F && (Args && ...)> >::value>
106 #endif
107  launch(F* kernel, LaunchParameters config, Args&&... args) {
108  using function_type = detail::kernel_traits<F>;
109  typename function_type::argument_type_tuple args_copy(args...);
110 
111  constexpr auto size = function_type::arguments_size;
112  void const* pointers[size];
113 
114  detail::pointer_setter<size>()(pointers, args_copy);
115  cudaCheck(cudaLaunchKernel(
116  (const void*)kernel, config.gridDim, config.blockDim, (void**)pointers, config.sharedMem, config.stream));
117  }
118 
119  // wrappers for cudaLaunchCooperativeKernel
120 
121  inline void launch_cooperative(void (*kernel)(), LaunchParameters config) {
122  cudaCheck(cudaLaunchCooperativeKernel(
123  (const void*)kernel, config.gridDim, config.blockDim, nullptr, config.sharedMem, config.stream));
124  }
125 
126  template <typename F, typename... Args>
127 #if __cplusplus >= 201703L
128  std::enable_if_t<std::is_invocable_r<void, F, Args&&...>::value>
129 #else
130  std::enable_if_t<std::is_void<std::result_of_t<F && (Args && ...)> >::value>
131 #endif
133  using function_type = detail::kernel_traits<F>;
134  typename function_type::argument_type_tuple args_copy(args...);
135 
136  constexpr auto size = function_type::arguments_size;
137  void const* pointers[size];
138 
139  detail::pointer_setter<size>()(pointers, args_copy);
140  cudaCheck(cudaLaunchCooperativeKernel(
141  (const void*)kernel, config.gridDim, config.blockDim, (void**)pointers, config.sharedMem, config.stream));
142  }
143 
144  } // namespace cuda
145 } // namespace cms
146 
147 #endif // HeterogeneousCore_CUDAUtilities_launch_h
writedatasetfile.args
args
Definition: writedatasetfile.py:18
cms::cuda::launch_cooperative
void launch_cooperative(void(*kernel)(), LaunchParameters config)
Definition: launch.h:121
cms::cuda::LaunchParameters::stream
cudaStream_t stream
Definition: launch.h:52
cms::cuda::LaunchParameters::gridDim
dim3 gridDim
Definition: launch.h:49
detail
Definition: ConvertingESProducerWithDependenciesT.h:23
F
static uInt32 F(BLOWFISH_CTX *ctx, uInt32 x)
Definition: blowfish.cc:163
cms::cuda::LaunchParameters::LaunchParameters
LaunchParameters(int gridDim, int blockDim, size_t sharedMem=0, cudaStream_t stream=nullptr)
Definition: launch.h:57
config
Definition: config.py:1
Exhume::I
const std::complex< double > I
Definition: I.h:8
cms::cuda::launch
void launch(void(*kernel)(), LaunchParameters config)
Definition: launch.h:96
cms::cuda::value
cudaStream_t T uint32_t const T *__restrict__ const uint32_t *__restrict__ uint32_t int cudaStream_t Func __host__ __device__ V value
Definition: HistoContainer.h:124
cms::cuda::LaunchParameters::blockDim
dim3 blockDim
Definition: launch.h:50
cms::cuda::detail::pointer_setter
Definition: launch.h:78
cms::cuda::LaunchParameters::sharedMem
size_t sharedMem
Definition: launch.h:51
cms::cuda::detail::pointer_setter< 0 >::operator()
void operator()(void const *ptrs[], Tuple const &t)
Definition: launch.h:89
gainCalibHelper::gainCalibPI::type
type
Definition: SiPixelGainCalibHelper.h:40
cms::cuda::LaunchParameters
Definition: launch.h:48
cudaCheck.h
get
#define get
cms::cuda::detail::kernel_traits
Definition: launch.h:64
cms::cuda::LaunchParameters::LaunchParameters
LaunchParameters(dim3 gridDim, dim3 blockDim, size_t sharedMem=0, cudaStream_t stream=nullptr)
Definition: launch.h:54
cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:62
cms::cuda::detail::kernel_traits< void(Args...)>::argument_type
typename std::tuple_element< i, argument_type_tuple >::type argument_type
Definition: launch.h:73
cms::cuda::size
__host__ __device__ VT uint32_t size
Definition: prefixScan.h:47
ecalDigis_cff.cuda
cuda
Definition: ecalDigis_cff.py:35
funct::void
TEMPL(T2) struct Divides void
Definition: Factorize.h:24
cms::cuda::detail::kernel_traits< void(Args...)>::argument_type_tuple
std::tuple< Args... > argument_type_tuple
Definition: launch.h:70
cms::cuda::detail::pointer_setter::operator()
void operator()(void const *ptrs[], Tuple const &t)
Definition: launch.h:80
submitPVValidationJobs.t
string t
Definition: submitPVValidationJobs.py:644
cms
Namespace of DDCMS conversion namespace.
Definition: ProducerAnalyzer.cc:21