CMS 3D CMS Logo

cudaCompat.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDAUtilities_interface_cudaCompat_h
2 #define HeterogeneousCore_CUDAUtilities_interface_cudaCompat_h
3 
4 /*
5  * Everything you need to run cuda code in plain sequential c++ code
6  */
7 
8 #ifndef __CUDACC__
9 
10 #include <algorithm>
11 #include <cstdint>
12 #include <cstring>
13 
14 // include the CUDA runtime header to define some of the attributes, types and sybols also on the CPU
15 #include <cuda_runtime.h>
16 
17 // make sure function are inlined to avoid multiple definition
18 #undef __global__
19 #define __global__ inline __attribute__((always_inline))
20 
21 #undef __forceinline__
22 #define __forceinline__ inline __attribute__((always_inline))
23 
24 namespace cms {
25  namespace cudacompat {
26 
27  // run serially with a single thread
28  // 1-dimensional block
29  const dim3 threadIdx = {0, 0, 0};
30  const dim3 blockDim = {1, 1, 1};
31  // 1-dimensional grid
32  const dim3 blockIdx = {0, 0, 0};
33  const dim3 gridDim = {1, 1, 1};
34 
35  template <typename T1, typename T2>
36  T1 atomicCAS(T1* address, T1 compare, T2 val) {
37  T1 old = *address;
38  *address = old == compare ? val : old;
39  return old;
40  }
41 
42  template <typename T1, typename T2>
43  T1 atomicInc(T1* a, T2 b) {
44  auto ret = *a;
45  if ((*a) < T1(b))
46  (*a)++;
47  return ret;
48  }
49 
50  template <typename T1, typename T2>
51  T1 atomicAdd(T1* a, T2 b) {
52  auto ret = *a;
53  (*a) += b;
54  return ret;
55  }
56 
57  template <typename T1, typename T2>
58  T1 atomicSub(T1* a, T2 b) {
59  auto ret = *a;
60  (*a) -= b;
61  return ret;
62  }
63 
64  template <typename T1, typename T2>
65  T1 atomicMin(T1* a, T2 b) {
66  auto ret = *a;
67  *a = std::min(*a, T1(b));
68  return ret;
69  }
70  template <typename T1, typename T2>
71  T1 atomicMax(T1* a, T2 b) {
72  auto ret = *a;
73  *a = std::max(*a, T1(b));
74  return ret;
75  }
76 
77  inline void __syncthreads() {}
78  inline void __threadfence() {}
79  inline bool __syncthreads_or(bool x) { return x; }
80  inline bool __syncthreads_and(bool x) { return x; }
81  template <typename T>
82  inline T __ldg(T const* x) {
83  return *x;
84  }
85 
86  } // namespace cudacompat
87 } // namespace cms
88 
89 // make the cudacompat implementation available in the global namespace
90 using namespace cms::cudacompat;
91 
92 #endif // __CUDACC__
93 
94 #endif // HeterogeneousCore_CUDAUtilities_interface_cudaCompat_h
runTheMatrix.ret
ret
prodAgent to be discontinued
Definition: runTheMatrix.py:542
cms::cudacompat::atomicSub
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:58
cms::cudacompat
Definition: HeterogeneousSoA.h:54
min
T min(T a, T b)
Definition: MathUtil.h:58
cms::cudacompat::__threadfence
void __threadfence()
Definition: cudaCompat.h:78
cms::cudacompat::__syncthreads
void __syncthreads()
Definition: cudaCompat.h:77
cms::cudacompat::__syncthreads_or
bool __syncthreads_or(bool x)
Definition: cudaCompat.h:79
cms::cudacompat::atomicAdd
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:51
b
double b
Definition: hdecay.h:118
cms::cudacompat::gridDim
const dim3 gridDim
Definition: cudaCompat.h:33
a
double a
Definition: hdecay.h:119
cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:30
SiStripPI::max
Definition: SiStripPayloadInspectorHelper.h:169
cms::cudacompat::atomicMax
T1 atomicMax(T1 *a, T2 b)
Definition: cudaCompat.h:71
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:29
heppy_batch.val
val
Definition: heppy_batch.py:351
cms::cudacompat::atomicCAS
T1 atomicCAS(T1 *address, T1 compare, T2 val)
Definition: cudaCompat.h:36
T
long double T
Definition: Basic3DVectorLD.h:48
cms::cudacompat::atomicMin
T1 atomicMin(T1 *a, T2 b)
Definition: cudaCompat.h:65
cms::cudacompat::__syncthreads_and
bool __syncthreads_and(bool x)
Definition: cudaCompat.h:80
compare
Definition: compare.py:1
cms::cudacompat::__ldg
T __ldg(T const *x)
Definition: cudaCompat.h:82
cms
Namespace of DDCMS conversion namespace.
Definition: ProducerAnalyzer.cc:21
cms::cudacompat::atomicInc
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:43
cms::cudacompat::blockIdx
const dim3 blockIdx
Definition: cudaCompat.h:32