CMS 3D CMS Logo

CachingDeviceAllocator.h
Go to the documentation of this file.
1 #ifndef HeterogenousCore_CUDAUtilities_src_CachingDeviceAllocator_h
2 #define HeterogenousCore_CUDAUtilities_src_CachingDeviceAllocator_h
3 
4 /******************************************************************************
5  * Copyright (c) 2011, Duane Merrill. All rights reserved.
6  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  * * Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  * * Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  * * Neither the name of the NVIDIA CORPORATION nor the
16  * names of its contributors may be used to endorse or promote products
17  * derived from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
23  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  ******************************************************************************/
31 
36 /******************************************************************************
37  * Simple caching allocator for device memory allocations. The allocator is
38  * thread-safe and capable of managing device allocations on multiple devices.
39  ******************************************************************************/
40 
41 #include <cmath>
42 #include <map>
43 #include <set>
44 #include <mutex>
45 
47 
49 namespace notcub {
50 
56  /******************************************************************************
57  * CachingDeviceAllocator (host use)
58  ******************************************************************************/
59 
99  struct CachingDeviceAllocator {
100  //---------------------------------------------------------------------
101  // Constants
102  //---------------------------------------------------------------------
103 
105  static const unsigned int INVALID_BIN = (unsigned int)-1;
106 
108  static const size_t INVALID_SIZE = (size_t)-1;
109 
110 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
111 
113  static const int INVALID_DEVICE_ORDINAL = -1;
114 
115  //---------------------------------------------------------------------
116  // Type definitions and helper types
117  //---------------------------------------------------------------------
118 
122  struct BlockDescriptor {
123  void *d_ptr; // Device pointer
124  size_t bytes; // Size of allocation in bytes
125  unsigned int bin; // Bin enumeration
126  int device; // device ordinal
127  cudaStream_t associated_stream; // Associated associated_stream
128  cudaEvent_t ready_event; // Signal when associated stream has run to the point at which this block was freed
129 
130  // Constructor (suitable for searching maps for a specific block, given its pointer and device)
131  BlockDescriptor(void *d_ptr, int device)
132  : d_ptr(d_ptr), bytes(0), bin(INVALID_BIN), device(device), associated_stream(nullptr), ready_event(nullptr) {}
133 
134  // Constructor (suitable for searching maps for a range of suitable blocks, given a device)
136  : d_ptr(nullptr),
137  bytes(0),
139  device(device),
140  associated_stream(nullptr),
141  ready_event(nullptr) {}
142 
143  // Comparison functor for comparing device pointers
144  static bool PtrCompare(const BlockDescriptor &a, const BlockDescriptor &b) {
145  if (a.device == b.device)
146  return (a.d_ptr < b.d_ptr);
147  else
148  return (a.device < b.device);
149  }
150 
151  // Comparison functor for comparing allocation sizes
152  static bool SizeCompare(const BlockDescriptor &a, const BlockDescriptor &b) {
153  if (a.device == b.device)
154  return (a.bytes < b.bytes);
155  else
156  return (a.device < b.device);
157  }
158  };
159 
161  typedef bool (*Compare)(const BlockDescriptor &, const BlockDescriptor &);
162 
163  class TotalBytes {
164  public:
165  size_t free;
166  size_t live;
167  TotalBytes() { free = live = 0; }
168  };
169 
171  typedef std::multiset<BlockDescriptor, Compare> CachedBlocks;
172 
174  typedef std::multiset<BlockDescriptor, Compare> BusyBlocks;
175 
177  typedef std::map<int, TotalBytes> GpuCachedBytes;
178 
179  //---------------------------------------------------------------------
180  // Utility functions
181  //---------------------------------------------------------------------
182 
186  static unsigned int IntPow(unsigned int base, unsigned int exp) {
187  unsigned int retval = 1;
188  while (exp > 0) {
189  if (exp & 1) {
190  retval = retval * base; // multiply the result by the current base
191  }
192  base = base * base; // square the base
193  exp = exp >> 1; // divide the exponent in half
194  }
195  return retval;
196  }
197 
201  void NearestPowerOf(unsigned int &power, size_t &rounded_bytes, unsigned int base, size_t value) {
202  power = 0;
203  rounded_bytes = 1;
204 
205  if (value * base < value) {
206  // Overflow
207  power = sizeof(size_t) * 8;
208  rounded_bytes = size_t(0) - 1;
209  return;
210  }
211 
212  while (rounded_bytes < value) {
213  rounded_bytes *= base;
214  power++;
215  }
216  }
217 
218  //---------------------------------------------------------------------
219  // Fields
220  //---------------------------------------------------------------------
221 
222  // CMS: use std::mutex instead of cub::Mutex
223  std::mutex mutex;
224 
225  unsigned int bin_growth;
226  unsigned int min_bin;
227  unsigned int max_bin;
228 
229  size_t min_bin_bytes;
230  size_t max_bin_bytes;
231  size_t max_cached_bytes;
232 
233  const bool
234  skip_cleanup;
235  bool debug;
236 
240 
241 #endif // DOXYGEN_SHOULD_SKIP_THIS
242 
243  //---------------------------------------------------------------------
244  // Methods
245  //---------------------------------------------------------------------
246 
251  unsigned int bin_growth,
252  unsigned int min_bin = 1,
253  unsigned int max_bin = INVALID_BIN,
256  false,
257  bool debug = false)
265  debug(debug),
266  cached_blocks(BlockDescriptor::SizeCompare),
267  live_blocks(BlockDescriptor::PtrCompare) {}
268 
282  CachingDeviceAllocator(bool skip_cleanup = false, bool debug = false)
283  : bin_growth(8),
284  min_bin(3),
285  max_bin(7),
288  max_cached_bytes((max_bin_bytes * 3) - 1),
290  debug(debug),
291  cached_blocks(BlockDescriptor::SizeCompare),
292  live_blocks(BlockDescriptor::PtrCompare) {}
293 
300  cudaError_t SetMaxCachedBytes(size_t max_cached_bytes) {
301  // Lock
302  mutex.lock();
303 
304  if (debug)
305  // CMS: use raw printf
306  printf("Changing max_cached_bytes (%lld -> %lld)\n",
307  (long long)this->max_cached_bytes,
308  (long long)max_cached_bytes);
309 
310  this->max_cached_bytes = max_cached_bytes;
311 
312  // Unlock
313  mutex.unlock();
314 
315  return cudaSuccess;
316  }
317 
325  cudaError_t DeviceAllocate(
326  int device,
327  void **d_ptr,
328  size_t bytes,
329  cudaStream_t active_stream = nullptr)
330  {
331  *d_ptr = nullptr;
332  int entrypoint_device = INVALID_DEVICE_ORDINAL;
333  cudaError_t error = cudaSuccess;
334 
335  if (device == INVALID_DEVICE_ORDINAL) {
336  // CMS: throw exception on error
337  cudaCheck(error = cudaGetDevice(&entrypoint_device));
338  device = entrypoint_device;
339  }
340 
341  // Create a block descriptor for the requested allocation
342  bool found = false;
343  BlockDescriptor search_key(device);
344  search_key.associated_stream = active_stream;
345  NearestPowerOf(search_key.bin, search_key.bytes, bin_growth, bytes);
346 
347  if (search_key.bin > max_bin) {
348  // Bin is greater than our maximum bin: allocate the request
349  // exactly and give out-of-bounds bin. It will not be cached
350  // for reuse when returned.
351  search_key.bin = INVALID_BIN;
352  search_key.bytes = bytes;
353  } else {
354  // Search for a suitable cached allocation: lock
355  mutex.lock();
356 
357  if (search_key.bin < min_bin) {
358  // Bin is less than minimum bin: round up
359  search_key.bin = min_bin;
360  search_key.bytes = min_bin_bytes;
361  }
362 
363  // Iterate through the range of cached blocks on the same device in the same bin
364  CachedBlocks::iterator block_itr = cached_blocks.lower_bound(search_key);
365  while ((block_itr != cached_blocks.end()) && (block_itr->device == device) &&
366  (block_itr->bin == search_key.bin)) {
367  // To prevent races with reusing blocks returned by the host but still
368  // in use by the device, only consider cached blocks that are
369  // either (from the active stream) or (from an idle stream)
370  if ((active_stream == block_itr->associated_stream) ||
371  (cudaEventQuery(block_itr->ready_event) != cudaErrorNotReady)) {
372  // Reuse existing cache block. Insert into live blocks.
373  found = true;
374  search_key = *block_itr;
375  search_key.associated_stream = active_stream;
376  live_blocks.insert(search_key);
377 
378  // Remove from free blocks
379  cached_bytes[device].free -= search_key.bytes;
380  cached_bytes[device].live += search_key.bytes;
381 
382  if (debug)
383  // CMS: improved debug message
384  // CMS: use raw printf
385  printf(
386  "\tDevice %d reused cached block at %p (%lld bytes) for stream %lld, event %lld (previously "
387  "associated with stream %lld, event %lld).\n",
388  device,
389  search_key.d_ptr,
390  (long long)search_key.bytes,
391  (long long)search_key.associated_stream,
392  (long long)search_key.ready_event,
393  (long long)block_itr->associated_stream,
394  (long long)block_itr->ready_event);
395 
396  cached_blocks.erase(block_itr);
397 
398  break;
399  }
400  block_itr++;
401  }
402 
403  // Done searching: unlock
404  mutex.unlock();
405  }
406 
407  // Allocate the block if necessary
408  if (!found) {
409  // Set runtime's current device to specified device (entrypoint may not be set)
410  if (device != entrypoint_device) {
411  // CMS: throw exception on error
412  cudaCheck(error = cudaGetDevice(&entrypoint_device));
413  cudaCheck(error = cudaSetDevice(device));
414  }
415 
416  // Attempt to allocate
417  // CMS: silently ignore errors and retry or pass them to the caller
418  if ((error = cudaMalloc(&search_key.d_ptr, search_key.bytes)) == cudaErrorMemoryAllocation) {
419  // The allocation attempt failed: free all cached blocks on device and retry
420  if (debug)
421  // CMS: use raw printf
422  printf(
423  "\tDevice %d failed to allocate %lld bytes for stream %lld, retrying after freeing cached allocations",
424  device,
425  (long long)search_key.bytes,
426  (long long)search_key.associated_stream);
427 
428  error = cudaSuccess; // Reset the error we will return
429  cudaGetLastError(); // Reset CUDART's error
430 
431  // Lock
432  mutex.lock();
433 
434  // Iterate the range of free blocks on the same device
435  BlockDescriptor free_key(device);
436  CachedBlocks::iterator block_itr = cached_blocks.lower_bound(free_key);
437 
438  while ((block_itr != cached_blocks.end()) && (block_itr->device == device)) {
439  // No need to worry about synchronization with the device: cudaFree is
440  // blocking and will synchronize across all kernels executing
441  // on the current device
442 
443  // Free device memory and destroy stream event.
444  // CMS: silently ignore errors and pass them to the caller
445  if ((error = cudaFree(block_itr->d_ptr)))
446  break;
447  if ((error = cudaEventDestroy(block_itr->ready_event)))
448  break;
449 
450  // Reduce balance and erase entry
451  cached_bytes[device].free -= block_itr->bytes;
452 
453  if (debug)
454  // CMS: use raw printf
455  printf(
456  "\tDevice %d freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks "
457  "(%lld bytes) outstanding.\n",
458  device,
459  (long long)block_itr->bytes,
460  (long long)cached_blocks.size(),
461  (long long)cached_bytes[device].free,
462  (long long)live_blocks.size(),
463  (long long)cached_bytes[device].live);
464 
465  cached_blocks.erase(block_itr);
466 
467  block_itr++;
468  }
469 
470  // Unlock
471  mutex.unlock();
472 
473  // Return under error
474  if (error)
475  return error;
476 
477  // Try to allocate again
478  // CMS: throw exception on error
479  cudaCheck(error = cudaMalloc(&search_key.d_ptr, search_key.bytes));
480  }
481 
482  // Create ready event
483  // CMS: throw exception on error
484  cudaCheck(error = cudaEventCreateWithFlags(&search_key.ready_event, cudaEventDisableTiming));
485 
486  // Insert into live blocks
487  mutex.lock();
488  live_blocks.insert(search_key);
489  cached_bytes[device].live += search_key.bytes;
490  mutex.unlock();
491 
492  if (debug)
493  // CMS: improved debug message
494  // CMS: use raw printf
495  printf("\tDevice %d allocated new device block at %p (%lld bytes associated with stream %lld, event %lld).\n",
496  device,
497  search_key.d_ptr,
498  (long long)search_key.bytes,
499  (long long)search_key.associated_stream,
500  (long long)search_key.ready_event);
501 
502  // Attempt to revert back to previous device if necessary
503  if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != device)) {
504  // CMS: throw exception on error
505  cudaCheck(error = cudaSetDevice(entrypoint_device));
506  }
507  }
508 
509  // Copy device pointer to output parameter
510  *d_ptr = search_key.d_ptr;
511 
512  if (debug)
513  // CMS: use raw printf
514  printf("\t\t%lld available blocks cached (%lld bytes), %lld live blocks outstanding(%lld bytes).\n",
515  (long long)cached_blocks.size(),
516  (long long)cached_bytes[device].free,
517  (long long)live_blocks.size(),
518  (long long)cached_bytes[device].live);
519 
520  return error;
521  }
522 
530  cudaError_t DeviceAllocate(
531  void **d_ptr,
532  size_t bytes,
533  cudaStream_t active_stream = nullptr)
534  {
535  return DeviceAllocate(INVALID_DEVICE_ORDINAL, d_ptr, bytes, active_stream);
536  }
537 
545  cudaError_t DeviceFree(int device, void *d_ptr) {
546  int entrypoint_device = INVALID_DEVICE_ORDINAL;
547  cudaError_t error = cudaSuccess;
548 
549  if (device == INVALID_DEVICE_ORDINAL) {
550  // CMS: throw exception on error
551  cudaCheck(error = cudaGetDevice(&entrypoint_device));
552  device = entrypoint_device;
553  }
554 
555  // Lock
556  mutex.lock();
557 
558  // Find corresponding block descriptor
559  bool recached = false;
560  BlockDescriptor search_key(d_ptr, device);
561  BusyBlocks::iterator block_itr = live_blocks.find(search_key);
562  if (block_itr != live_blocks.end()) {
563  // Remove from live blocks
564  search_key = *block_itr;
565  live_blocks.erase(block_itr);
566  cached_bytes[device].live -= search_key.bytes;
567 
568  // Keep the returned allocation if bin is valid and we won't exceed the max cached threshold
569  if ((search_key.bin != INVALID_BIN) && (cached_bytes[device].free + search_key.bytes <= max_cached_bytes)) {
570  // Insert returned allocation into free blocks
571  recached = true;
572  cached_blocks.insert(search_key);
573  cached_bytes[device].free += search_key.bytes;
574 
575  if (debug)
576  // CMS: improved debug message
577  // CMS: use raw printf
578  printf(
579  "\tDevice %d returned %lld bytes at %p from associated stream %lld, event %lld.\n\t\t %lld available "
580  "blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n",
581  device,
582  (long long)search_key.bytes,
583  d_ptr,
584  (long long)search_key.associated_stream,
585  (long long)search_key.ready_event,
586  (long long)cached_blocks.size(),
587  (long long)cached_bytes[device].free,
588  (long long)live_blocks.size(),
589  (long long)cached_bytes[device].live);
590  }
591  }
592 
593  // First set to specified device (entrypoint may not be set)
594  if (device != entrypoint_device) {
595  // CMS: throw exception on error
596  cudaCheck(error = cudaGetDevice(&entrypoint_device));
597  cudaCheck(error = cudaSetDevice(device));
598  }
599 
600  if (recached) {
601  // Insert the ready event in the associated stream (must have current device set properly)
602  // CMS: throw exception on error
603  cudaCheck(error = cudaEventRecord(search_key.ready_event, search_key.associated_stream));
604  }
605 
606  // Unlock
607  mutex.unlock();
608 
609  if (!recached) {
610  // Free the allocation from the runtime and cleanup the event.
611  // CMS: throw exception on error
612  cudaCheck(error = cudaFree(d_ptr));
613  cudaCheck(error = cudaEventDestroy(search_key.ready_event));
614 
615  if (debug)
616  // CMS: improved debug message
617  printf(
618  "\tDevice %d freed %lld bytes at %p from associated stream %lld, event %lld.\n\t\t %lld available "
619  "blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
620  device,
621  (long long)search_key.bytes,
622  d_ptr,
623  (long long)search_key.associated_stream,
624  (long long)search_key.ready_event,
625  (long long)cached_blocks.size(),
626  (long long)cached_bytes[device].free,
627  (long long)live_blocks.size(),
628  (long long)cached_bytes[device].live);
629  }
630 
631  // Reset device
632  if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != device)) {
633  // CMS: throw exception on error
634  cudaCheck(error = cudaSetDevice(entrypoint_device));
635  }
636 
637  return error;
638  }
639 
647  cudaError_t DeviceFree(void *d_ptr) { return DeviceFree(INVALID_DEVICE_ORDINAL, d_ptr); }
648 
652  cudaError_t FreeAllCached() {
653  cudaError_t error = cudaSuccess;
654  int entrypoint_device = INVALID_DEVICE_ORDINAL;
655  int current_device = INVALID_DEVICE_ORDINAL;
656 
657  mutex.lock();
658 
659  while (!cached_blocks.empty()) {
660  // Get first block
661  CachedBlocks::iterator begin = cached_blocks.begin();
662 
663  // Get entry-point device ordinal if necessary
664  if (entrypoint_device == INVALID_DEVICE_ORDINAL) {
665  // CMS: silently ignore errors and pass them to the caller
666  if ((error = cudaGetDevice(&entrypoint_device)))
667  break;
668  }
669 
670  // Set current device ordinal if necessary
671  if (begin->device != current_device) {
672  // CMS: silently ignore errors and pass them to the caller
673  if ((error = cudaSetDevice(begin->device)))
674  break;
675  current_device = begin->device;
676  }
677 
678  // Free device memory
679  // CMS: silently ignore errors and pass them to the caller
680  if ((error = cudaFree(begin->d_ptr)))
681  break;
682  if ((error = cudaEventDestroy(begin->ready_event)))
683  break;
684 
685  // Reduce balance and erase entry
686  cached_bytes[current_device].free -= begin->bytes;
687 
688  if (debug)
689  printf(
690  "\tDevice %d freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld "
691  "bytes) outstanding.\n",
692  current_device,
693  (long long)begin->bytes,
694  (long long)cached_blocks.size(),
695  (long long)cached_bytes[current_device].free,
696  (long long)live_blocks.size(),
697  (long long)cached_bytes[current_device].live);
698 
699  cached_blocks.erase(begin);
700  }
701 
702  mutex.unlock();
703 
704  // Attempt to revert back to entry-point device if necessary
705  if (entrypoint_device != INVALID_DEVICE_ORDINAL) {
706  // CMS: throw exception on error
707  cudaCheck(error = cudaSetDevice(entrypoint_device));
708  }
709 
710  return error;
711  }
712 
716  // CMS: make the destructor not virtual
718  if (!skip_cleanup)
719  FreeAllCached();
720  }
721  };
722  // end group UtilMgmt
724 
725 } // namespace notcub
726 
727 #endif
notcub::CachingDeviceAllocator::min_bin_bytes
size_t min_bin_bytes
Maximum bin enumeration.
Definition: CachingDeviceAllocator.h:254
notcub::CachingDeviceAllocator::live_blocks
BusyBlocks live_blocks
Set of cached device allocations available for reuse.
Definition: CachingDeviceAllocator.h:264
electrons_cff.bool
bool
Definition: electrons_cff.py:372
notcub::CachingDeviceAllocator::DeviceFree
cudaError_t DeviceFree(int device, void *d_ptr)
Frees a live allocation of device memory on the specified device, returning it to the allocator.
Definition: CachingDeviceAllocator.h:570
notcub::CachingDeviceAllocator::BlockDescriptor::PtrCompare
static bool PtrCompare(const BlockDescriptor &a, const BlockDescriptor &b)
Definition: CachingDeviceAllocator.h:169
notcub::CachingDeviceAllocator::max_bin_bytes
size_t max_bin_bytes
Minimum bin size.
Definition: CachingDeviceAllocator.h:255
notcub::CachingDeviceAllocator::CachedBlocks
std::multiset< BlockDescriptor, Compare > CachedBlocks
Set type for cached blocks (ordered by size)
Definition: CachingDeviceAllocator.h:196
notcub::CachingDeviceAllocator::TotalBytes::live
size_t live
Definition: CachingDeviceAllocator.h:191
notcub::CachingDeviceAllocator::max_bin
unsigned int max_bin
Minimum bin enumeration.
Definition: CachingDeviceAllocator.h:252
notcub::CachingDeviceAllocator::INVALID_SIZE
static const size_t INVALID_SIZE
Invalid size.
Definition: CachingDeviceAllocator.h:133
notcub::CachingDeviceAllocator::min_bin
unsigned int min_bin
Geometric growth factor for bin-sizes.
Definition: CachingDeviceAllocator.h:251
notcub::CachingDeviceAllocator::BlockDescriptor::d_ptr
void * d_ptr
Definition: CachingDeviceAllocator.h:148
notcub::CachingDeviceAllocator::mutex
std::mutex mutex
Definition: CachingDeviceAllocator.h:248
newFWLiteAna.found
found
Definition: newFWLiteAna.py:118
relativeConstraints.error
error
Definition: relativeConstraints.py:53
notcub::CachingDeviceAllocator::BlockDescriptor::bin
unsigned int bin
Definition: CachingDeviceAllocator.h:150
notcub
CUB namespace.
Definition: CachingDeviceAllocator.h:47
notcub::CachingDeviceAllocator::FreeAllCached
cudaError_t FreeAllCached()
Frees all cached device allocations on all devices.
Definition: CachingDeviceAllocator.h:677
notcub::CachingDeviceAllocator
A simple caching allocator for device memory allocations.
Definition: CachingDeviceAllocator.h:123
notcub::CachingDeviceAllocator::DeviceAllocate
cudaError_t DeviceAllocate(int device, void **d_ptr, size_t bytes, cudaStream_t active_stream=nullptr)
Provides a suitable allocation of device memory for the given size on the specified device.
Definition: CachingDeviceAllocator.h:350
notcub::CachingDeviceAllocator::BlockDescriptor::SizeCompare
static bool SizeCompare(const BlockDescriptor &a, const BlockDescriptor &b)
Definition: CachingDeviceAllocator.h:177
notcub::CachingDeviceAllocator::cached_bytes
GpuCachedBytes cached_bytes
Whether or not to print (de)allocation events to stdout.
Definition: CachingDeviceAllocator.h:262
notcub::CachingDeviceAllocator::IntPow
static unsigned int IntPow(unsigned int base, unsigned int exp)
Definition: CachingDeviceAllocator.h:211
notcub::CachingDeviceAllocator::BlockDescriptor::ready_event
cudaEvent_t ready_event
Definition: CachingDeviceAllocator.h:153
notcub::CachingDeviceAllocator::cached_blocks
CachedBlocks cached_blocks
Map of device ordinal to aggregate cached bytes on that device.
Definition: CachingDeviceAllocator.h:263
notcub::CachingDeviceAllocator::skip_cleanup
const bool skip_cleanup
Maximum aggregate cached bytes per device.
Definition: CachingDeviceAllocator.h:259
mutex
static boost::mutex mutex
Definition: Proxy.cc:9
b
double b
Definition: hdecay.h:118
notcub::CachingDeviceAllocator::TotalBytes::TotalBytes
TotalBytes()
Definition: CachingDeviceAllocator.h:192
notcub::CachingDeviceAllocator::BlockDescriptor
Definition: CachingDeviceAllocator.h:147
notcub::CachingDeviceAllocator::NearestPowerOf
void NearestPowerOf(unsigned int &power, size_t &rounded_bytes, unsigned int base, size_t value)
Definition: CachingDeviceAllocator.h:226
a
double a
Definition: hdecay.h:119
beam_dqm_sourceclient-live_cfg.live
live
Definition: beam_dqm_sourceclient-live_cfg.py:24
notcub::CachingDeviceAllocator::TotalBytes::free
size_t free
Definition: CachingDeviceAllocator.h:190
createfilelist.int
int
Definition: createfilelist.py:10
notcub::CachingDeviceAllocator::~CachingDeviceAllocator
~CachingDeviceAllocator()
Destructor.
Definition: CachingDeviceAllocator.h:742
value
Definition: value.py:1
cudaCheck.h
notcub::CachingDeviceAllocator::BlockDescriptor::BlockDescriptor
BlockDescriptor(void *d_ptr, int device)
Definition: CachingDeviceAllocator.h:156
notcub::CachingDeviceAllocator::bin_growth
unsigned int bin_growth
Mutex for thread-safety.
Definition: CachingDeviceAllocator.h:250
notcub::CachingDeviceAllocator::BlockDescriptor::device
int device
Definition: CachingDeviceAllocator.h:151
cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:62
notcub::CachingDeviceAllocator::INVALID_BIN
static const unsigned int INVALID_BIN
Out-of-bounds bin.
Definition: CachingDeviceAllocator.h:130
notcub::CachingDeviceAllocator::CachingDeviceAllocator
CachingDeviceAllocator(unsigned int bin_growth, unsigned int min_bin=1, unsigned int max_bin=INVALID_BIN, size_t max_cached_bytes=INVALID_SIZE, bool skip_cleanup=false, bool debug=false)
Set of live device allocations currently in use.
Definition: CachingDeviceAllocator.h:275
notcub::CachingDeviceAllocator::Compare
bool(* Compare)(const BlockDescriptor &, const BlockDescriptor &)
BlockDescriptor comparator function interface.
Definition: CachingDeviceAllocator.h:186
notcub::CachingDeviceAllocator::max_cached_bytes
size_t max_cached_bytes
Maximum bin size.
Definition: CachingDeviceAllocator.h:256
notcub::CachingDeviceAllocator::BlockDescriptor::associated_stream
cudaStream_t associated_stream
Definition: CachingDeviceAllocator.h:152
notcub::CachingDeviceAllocator::SetMaxCachedBytes
cudaError_t SetMaxCachedBytes(size_t max_cached_bytes)
Sets the limit on the number bytes this allocator is allowed to cache per device.
Definition: CachingDeviceAllocator.h:325
notcub::CachingDeviceAllocator::BlockDescriptor::bytes
size_t bytes
Definition: CachingDeviceAllocator.h:149
notcub::CachingDeviceAllocator::GpuCachedBytes
std::map< int, TotalBytes > GpuCachedBytes
Map type of device ordinals to the number of cached bytes cached by each device.
Definition: CachingDeviceAllocator.h:202
JetChargeProducer_cfi.exp
exp
Definition: JetChargeProducer_cfi.py:6
notcub::CachingDeviceAllocator::debug
bool debug
Whether or not to skip a call to FreeAllCached() when destructor is called. (The CUDA runtime may hav...
Definition: CachingDeviceAllocator.h:260
notcub::CachingDeviceAllocator::INVALID_DEVICE_ORDINAL
static const int INVALID_DEVICE_ORDINAL
Invalid device ordinal.
Definition: CachingDeviceAllocator.h:138
newFWLiteAna.base
base
Definition: newFWLiteAna.py:92
begin
#define begin
Definition: vmac.h:32
notcub::CachingDeviceAllocator::BusyBlocks
std::multiset< BlockDescriptor, Compare > BusyBlocks
Set type for live blocks (ordered by ptr)
Definition: CachingDeviceAllocator.h:199