CMS 3D CMS Logo

cudaCompat.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDAUtilities_interface_cudaCompat_h
2 #define HeterogeneousCore_CUDAUtilities_interface_cudaCompat_h
3 
4 /*
5  * Everything you need to run cuda code in plain sequential c++ code
6  */
7 
8 #ifndef __CUDACC__
9 
10 #include <algorithm>
11 #include <cstdint>
12 #include <cstring>
13 
14 #include <cuda_runtime.h>
15 
16 namespace cms {
17  namespace cudacompat {
18 
19 #ifndef __CUDA_RUNTIME_H__
20  struct dim3 {
21  uint32_t x, y, z;
22  };
23 #endif
24  const dim3 threadIdx = {0, 0, 0};
25  const dim3 blockDim = {1, 1, 1};
26 
27  extern thread_local dim3 blockIdx;
28  extern thread_local dim3 gridDim;
29 
30  template <typename T1, typename T2>
31  T1 atomicCAS(T1* address, T1 compare, T2 val) {
32  T1 old = *address;
33  *address = old == compare ? val : old;
34  return old;
35  }
36 
37  template <typename T1, typename T2>
38  T1 atomicInc(T1* a, T2 b) {
39  auto ret = *a;
40  if ((*a) < T1(b))
41  (*a)++;
42  return ret;
43  }
44 
45  template <typename T1, typename T2>
46  T1 atomicAdd(T1* a, T2 b) {
47  auto ret = *a;
48  (*a) += b;
49  return ret;
50  }
51 
52  template <typename T1, typename T2>
53  T1 atomicSub(T1* a, T2 b) {
54  auto ret = *a;
55  (*a) -= b;
56  return ret;
57  }
58 
59  template <typename T1, typename T2>
60  T1 atomicMin(T1* a, T2 b) {
61  auto ret = *a;
62  *a = std::min(*a, T1(b));
63  return ret;
64  }
65  template <typename T1, typename T2>
66  T1 atomicMax(T1* a, T2 b) {
67  auto ret = *a;
68  *a = std::max(*a, T1(b));
69  return ret;
70  }
71 
72  inline void __syncthreads() {}
73  inline void __threadfence() {}
74  inline bool __syncthreads_or(bool x) { return x; }
75  inline bool __syncthreads_and(bool x) { return x; }
76  template <typename T>
77  inline T __ldg(T const* x) {
78  return *x;
79  }
80 
81  inline void resetGrid() {
82  blockIdx = {0, 0, 0};
83  gridDim = {1, 1, 1};
84  }
85 
86  } // namespace cudacompat
87 } // namespace cms
88 
89 // some not needed as done by cuda runtime...
90 #ifndef __CUDA_RUNTIME_H__
91 #define __host__
92 #define __device__
93 #define __global__
94 #define __shared__
95 #define __forceinline__
96 #endif
97 
98 // make sure function are inlined to avoid multiple definition
99 #ifndef __CUDA_ARCH__
100 #undef __global__
101 #define __global__ inline __attribute__((always_inline))
102 #undef __forceinline__
103 #define __forceinline__ inline __attribute__((always_inline))
104 #endif
105 
106 #ifndef __CUDA_ARCH__
107 using namespace cms::cudacompat;
108 #endif
109 
110 #endif
111 
112 #endif // HeterogeneousCore_CUDAUtilities_interface_cudaCompat_h
runTheMatrix.ret
ret
prodAgent to be discontinued
Definition: runTheMatrix.py:373
cms::cudacompat::atomicSub
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:53
cms::cudacompat
Definition: HeterogeneousSoA.h:54
min
T min(T a, T b)
Definition: MathUtil.h:58
cms::cudacompat::__threadfence
void __threadfence()
Definition: cudaCompat.h:73
cms::cudacompat::dim3::y
uint32_t y
Definition: cudaCompat.h:21
cms::cudacompat::gridDim
thread_local dim3 gridDim
Definition: cudaCompat.cc:6
cms::cudacompat::__syncthreads_or
bool __syncthreads_or(bool x)
Definition: cudaCompat.h:74
cms::cudacompat::dim3::z
uint32_t z
Definition: cudaCompat.h:21
cms::cudacompat::atomicAdd
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:46
b
double b
Definition: hdecay.h:118
cms::cudacompat::dim3
Definition: cudaCompat.h:20
cms::cudacompat::resetGrid
void resetGrid()
Definition: cudaCompat.h:81
a
double a
Definition: hdecay.h:119
cms::cudacompat::blockDim
const dim3 blockDim
Definition: cudaCompat.h:25
SiStripPI::max
Definition: SiStripPayloadInspectorHelper.h:169
cms::cudacompat::atomicMax
T1 atomicMax(T1 *a, T2 b)
Definition: cudaCompat.h:66
cms::cudacompat::dim3::x
uint32_t x
Definition: cudaCompat.h:21
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition: cudaCompat.h:24
heppy_batch.val
val
Definition: heppy_batch.py:351
cms::cudacompat::atomicCAS
T1 atomicCAS(T1 *address, T1 compare, T2 val)
Definition: cudaCompat.h:31
T
long double T
Definition: Basic3DVectorLD.h:48
cms::cudacompat::atomicMin
T1 atomicMin(T1 *a, T2 b)
Definition: cudaCompat.h:60
cms::cudacompat::__syncthreads_and
bool __syncthreads_and(bool x)
Definition: cudaCompat.h:75
compare
Definition: compare.py:1
cms::cudacompat::__ldg
T __ldg(T const *x)
Definition: cudaCompat.h:77
cms::cudacompat::__syncthreads
void __syncthreads()
Definition: cudaCompat.h:72
cms
Namespace of DDCMS conversion namespace.
Definition: ProducerAnalyzer.cc:21
cms::cudacompat::atomicInc
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:38
cms::cudacompat::blockIdx
thread_local dim3 blockIdx
Definition: cudaCompat.cc:5