CMS 3D CMS Logo

getCachingDeviceAllocator.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDACore_src_getCachingDeviceAllocator
2 #define HeterogeneousCore_CUDACore_src_getCachingDeviceAllocator
3 
9 
10 #include <iomanip>
11 
12 namespace cms::cuda::allocator {
13  // Use caching or not
14  constexpr bool useCaching = true;
15  // Growth factor (bin_growth in cub::CachingDeviceAllocator
16  constexpr unsigned int binGrowth = 2;
17  // Smallest bin, corresponds to binGrowth^minBin bytes (min_bin in cub::CacingDeviceAllocator
18  constexpr unsigned int minBin = 8;
19  // Largest bin, corresponds to binGrowth^maxBin bytes (max_bin in cub::CachingDeviceAllocator). Note that unlike in cub, allocations larger than binGrowth^maxBin are set to fail.
20  constexpr unsigned int maxBin = 30;
21  // Total storage for the allocator. 0 means no limit.
22  constexpr size_t maxCachedBytes = 0;
23  // Fraction of total device memory taken for the allocator. In case there are multiple devices with different amounts of memory, the smallest of them is taken. If maxCachedBytes is non-zero, the smallest of them is taken.
24  constexpr double maxCachedFraction = 0.8;
25  constexpr bool debug = false;
26 
27  inline size_t minCachedBytes() {
29  int currentDevice;
30  cudaCheck(cudaGetDevice(&currentDevice));
31  const int numberOfDevices = deviceCount();
32  for (int i = 0; i < numberOfDevices; ++i) {
33  size_t freeMemory, totalMemory;
34  cudaCheck(cudaSetDevice(i));
35  cudaCheck(cudaMemGetInfo(&freeMemory, &totalMemory));
36  ret = std::min(ret, static_cast<size_t>(maxCachedFraction * freeMemory));
37  }
38  cudaCheck(cudaSetDevice(currentDevice));
39  if (maxCachedBytes > 0) {
41  }
42  return ret;
43  }
44 
46  LogDebug("CachingDeviceAllocator").log([](auto& log) {
47  log << "cub::CachingDeviceAllocator settings\n"
48  << " bin growth " << binGrowth << "\n"
49  << " min bin " << minBin << "\n"
50  << " max bin " << maxBin << "\n"
51  << " resulting bins:\n";
52  for (auto bin = minBin; bin <= maxBin; ++bin) {
54  if (binSize >= (1 << 30) and binSize % (1 << 30) == 0) {
55  log << " " << std::setw(8) << (binSize >> 30) << " GB\n";
56  } else if (binSize >= (1 << 20) and binSize % (1 << 20) == 0) {
57  log << " " << std::setw(8) << (binSize >> 20) << " MB\n";
58  } else if (binSize >= (1 << 10) and binSize % (1 << 10) == 0) {
59  log << " " << std::setw(8) << (binSize >> 10) << " kB\n";
60  } else {
61  log << " " << std::setw(9) << binSize << " B\n";
62  }
63  }
64  log << " maximum amount of cached memory: " << (minCachedBytes() >> 20) << " MB\n";
65  });
66 
67  // the public interface is thread safe
69  minBin,
70  maxBin,
72  false, // do not skip cleanup
73  debug};
74  return allocator;
75  }
76 } // namespace cms::cuda::allocator
77 
78 #endif
runTheMatrix.ret
ret
prodAgent to be discontinued
Definition: runTheMatrix.py:373
cms::cuda::allocator::minCachedBytes
size_t minCachedBytes()
Definition: getCachingDeviceAllocator.h:27
cms::cuda::allocator::minBin
constexpr unsigned int minBin
Definition: getCachingDeviceAllocator.h:18
mps_fire.i
i
Definition: mps_fire.py:428
MessageLogger.h
cms::cuda::allocator::useCaching
constexpr bool useCaching
Definition: getCachingDeviceAllocator.h:14
min
T min(T a, T b)
Definition: MathUtil.h:58
notcub::CachingDeviceAllocator
A simple caching allocator for device memory allocations.
Definition: CachingDeviceAllocator.h:124
deviceCount.h
cms::cuda::allocator::maxCachedBytes
constexpr size_t maxCachedBytes
Definition: getCachingDeviceAllocator.h:22
cms::cuda::numberOfDevices
int numberOfDevices()
Definition: numberOfDevices.cc:6
notcub::CachingDeviceAllocator::IntPow
static unsigned int IntPow(unsigned int base, unsigned int exp)
Definition: CachingDeviceAllocator.h:216
CMS_THREAD_SAFE
#define CMS_THREAD_SAFE
Definition: thread_safety_macros.h:4
cms::cuda::currentDevice
int currentDevice()
Definition: currentDevice.h:10
LogDebug
#define LogDebug(id)
Definition: MessageLogger.h:223
SiStripPI::max
Definition: SiStripPayloadInspectorHelper.h:169
thread_safety_macros.h
cms::cuda::deviceCount
int deviceCount()
Definition: deviceCount.h:10
cms::cuda::allocator::maxBin
constexpr unsigned int maxBin
Definition: getCachingDeviceAllocator.h:20
CachingDeviceAllocator.h
cudaCheck.h
cms::cuda::allocator
Definition: deviceAllocatorStatus.h:8
newFWLiteAna.bin
bin
Definition: newFWLiteAna.py:161
cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:62
cms::cuda::allocator::maxCachedFraction
constexpr double maxCachedFraction
Definition: getCachingDeviceAllocator.h:24
dqm-mbProfile.log
log
Definition: dqm-mbProfile.py:17
cms::cuda::allocator::debug
constexpr bool debug
Definition: getCachingDeviceAllocator.h:25
cms::cuda::allocator::binGrowth
constexpr unsigned int binGrowth
Definition: getCachingDeviceAllocator.h:16
cms::cuda::allocator::getCachingDeviceAllocator
notcub::CachingDeviceAllocator & getCachingDeviceAllocator()
Definition: getCachingDeviceAllocator.h:45