CMS 3D CMS Logo

CachingHostAllocator.h
Go to the documentation of this file.
1 #ifndef HeterogenousCore_CUDAUtilities_src_CachingHostAllocator_h
2 #define HeterogenousCore_CUDAUtilities_src_CachingHostAllocator_h
3 
4 /******************************************************************************
5  * Copyright (c) 2011, Duane Merrill. All rights reserved.
6  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  * * Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  * * Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  * * Neither the name of the NVIDIA CORPORATION nor the
16  * names of its contributors may be used to endorse or promote products
17  * derived from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
23  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  ******************************************************************************/
31 
36 /******************************************************************************
37  * Simple caching allocator for pinned host memory allocations. The allocator is
38  * thread-safe.
39  ******************************************************************************/
40 
41 #include <cmath>
42 #include <map>
43 #include <set>
44 #include <mutex>
45 
47 
49 namespace notcub {
50 
56  /******************************************************************************
57  * CachingHostAllocator (host use)
58  ******************************************************************************/
59 
100  struct CachingHostAllocator {
101  //---------------------------------------------------------------------
102  // Constants
103  //---------------------------------------------------------------------
104 
106  static const unsigned int INVALID_BIN = (unsigned int)-1;
107 
109  static const size_t INVALID_SIZE = (size_t)-1;
110 
111 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
112 
114  static const int INVALID_DEVICE_ORDINAL = -1;
115 
116  //---------------------------------------------------------------------
117  // Type definitions and helper types
118  //---------------------------------------------------------------------
119 
123  struct BlockDescriptor {
124  void *d_ptr; // Host pointer
125  size_t bytes; // Size of allocation in bytes
126  unsigned int bin; // Bin enumeration
127  int device; // device ordinal
128  cudaStream_t associated_stream; // Associated associated_stream
129  cudaEvent_t ready_event; // Signal when associated stream has run to the point at which this block was freed
130 
131  // Constructor (suitable for searching maps for a specific block, given its pointer)
132  BlockDescriptor(void *d_ptr)
133  : d_ptr(d_ptr),
134  bytes(0),
135  bin(INVALID_BIN),
137  associated_stream(nullptr),
138  ready_event(nullptr) {}
139 
140  // Constructor (suitable for searching maps for a range of suitable blocks)
142  : d_ptr(nullptr),
143  bytes(0),
144  bin(INVALID_BIN),
146  associated_stream(nullptr),
147  ready_event(nullptr) {}
148 
149  // Comparison functor for comparing host pointers
150  static bool PtrCompare(const BlockDescriptor &a, const BlockDescriptor &b) { return (a.d_ptr < b.d_ptr); }
151 
152  // Comparison functor for comparing allocation sizes
153  static bool SizeCompare(const BlockDescriptor &a, const BlockDescriptor &b) { return (a.bytes < b.bytes); }
154  };
155 
157  typedef bool (*Compare)(const BlockDescriptor &, const BlockDescriptor &);
158 
159  class TotalBytes {
160  public:
161  size_t free;
162  size_t live;
163  TotalBytes() { free = live = 0; }
164  };
165 
167  typedef std::multiset<BlockDescriptor, Compare> CachedBlocks;
168 
170  typedef std::multiset<BlockDescriptor, Compare> BusyBlocks;
171 
172  //---------------------------------------------------------------------
173  // Utility functions
174  //---------------------------------------------------------------------
175 
179  static unsigned int IntPow(unsigned int base, unsigned int exp) {
180  unsigned int retval = 1;
181  while (exp > 0) {
182  if (exp & 1) {
183  retval = retval * base; // multiply the result by the current base
184  }
185  base = base * base; // square the base
186  exp = exp >> 1; // divide the exponent in half
187  }
188  return retval;
189  }
190 
194  void NearestPowerOf(unsigned int &power, size_t &rounded_bytes, unsigned int base, size_t value) {
195  power = 0;
196  rounded_bytes = 1;
197 
198  if (value * base < value) {
199  // Overflow
200  power = sizeof(size_t) * 8;
201  rounded_bytes = size_t(0) - 1;
202  return;
203  }
204 
205  while (rounded_bytes < value) {
206  rounded_bytes *= base;
207  power++;
208  }
209  }
210 
211  //---------------------------------------------------------------------
212  // Fields
213  //---------------------------------------------------------------------
214 
215  std::mutex mutex;
216 
217  unsigned int bin_growth;
218  unsigned int min_bin;
219  unsigned int max_bin;
220 
221  size_t min_bin_bytes;
222  size_t max_bin_bytes;
223  size_t max_cached_bytes;
224 
225  const bool
226  skip_cleanup;
227  bool debug;
228 
232 
233 #endif // DOXYGEN_SHOULD_SKIP_THIS
234 
235  //---------------------------------------------------------------------
236  // Methods
237  //---------------------------------------------------------------------
238 
243  unsigned int bin_growth,
244  unsigned int min_bin = 1,
245  unsigned int max_bin = INVALID_BIN,
248  false,
249  bool debug = false)
257  debug(debug),
258  cached_blocks(BlockDescriptor::SizeCompare),
259  live_blocks(BlockDescriptor::PtrCompare) {}
260 
274  CachingHostAllocator(bool skip_cleanup = false, bool debug = false)
275  : bin_growth(8),
276  min_bin(3),
277  max_bin(7),
280  max_cached_bytes((max_bin_bytes * 3) - 1),
282  debug(debug),
283  cached_blocks(BlockDescriptor::SizeCompare),
284  live_blocks(BlockDescriptor::PtrCompare) {}
285 
292  void SetMaxCachedBytes(size_t max_cached_bytes) {
293  // Lock
294  mutex.lock();
295 
296  if (debug)
297  printf("Changing max_cached_bytes (%lld -> %lld)\n",
298  (long long)this->max_cached_bytes,
299  (long long)max_cached_bytes);
300 
301  this->max_cached_bytes = max_cached_bytes;
302 
303  // Unlock
304  mutex.unlock();
305  }
306 
312  cudaError_t HostAllocate(
313  void **d_ptr,
314  size_t bytes,
315  cudaStream_t active_stream = nullptr)
316  {
317  *d_ptr = nullptr;
318  int device = INVALID_DEVICE_ORDINAL;
319  cudaError_t error = cudaSuccess;
320 
321  cudaCheck(error = cudaGetDevice(&device));
322 
323  // Create a block descriptor for the requested allocation
324  bool found = false;
325  BlockDescriptor search_key;
326  search_key.device = device;
327  search_key.associated_stream = active_stream;
328  NearestPowerOf(search_key.bin, search_key.bytes, bin_growth, bytes);
329 
330  if (search_key.bin > max_bin) {
331  // Bin is greater than our maximum bin: allocate the request
332  // exactly and give out-of-bounds bin. It will not be cached
333  // for reuse when returned.
334  search_key.bin = INVALID_BIN;
335  search_key.bytes = bytes;
336  } else {
337  // Search for a suitable cached allocation: lock
338  mutex.lock();
339 
340  if (search_key.bin < min_bin) {
341  // Bin is less than minimum bin: round up
342  search_key.bin = min_bin;
343  search_key.bytes = min_bin_bytes;
344  }
345 
346  // Iterate through the range of cached blocks in the same bin
347  CachedBlocks::iterator block_itr = cached_blocks.lower_bound(search_key);
348  while ((block_itr != cached_blocks.end()) && (block_itr->bin == search_key.bin)) {
349  // To prevent races with reusing blocks returned by the host but still
350  // in use for transfers, only consider cached blocks that are from an idle stream
351  if (cudaEventQuery(block_itr->ready_event) != cudaErrorNotReady) {
352  // Reuse existing cache block. Insert into live blocks.
353  found = true;
354  search_key = *block_itr;
355  search_key.associated_stream = active_stream;
356  if (search_key.device != device) {
357  // If "associated" device changes, need to re-create the event on the right device
358  cudaCheck(error = cudaSetDevice(search_key.device));
359  cudaCheck(error = cudaEventDestroy(search_key.ready_event));
360  cudaCheck(error = cudaSetDevice(device));
361  cudaCheck(error = cudaEventCreateWithFlags(&search_key.ready_event, cudaEventDisableTiming));
362  search_key.device = device;
363  }
364 
365  live_blocks.insert(search_key);
366 
367  // Remove from free blocks
368  cached_bytes.free -= search_key.bytes;
369  cached_bytes.live += search_key.bytes;
370 
371  if (debug)
372  printf(
373  "\tHost reused cached block at %p (%lld bytes) for stream %lld, event %lld on device %lld "
374  "(previously associated with stream %lld, event %lld).\n",
375  search_key.d_ptr,
376  (long long)search_key.bytes,
377  (long long)search_key.associated_stream,
378  (long long)search_key.ready_event,
379  (long long)search_key.device,
380  (long long)block_itr->associated_stream,
381  (long long)block_itr->ready_event);
382 
383  cached_blocks.erase(block_itr);
384 
385  break;
386  }
387  block_itr++;
388  }
389 
390  // Done searching: unlock
391  mutex.unlock();
392  }
393 
394  // Allocate the block if necessary
395  if (!found) {
396  // Attempt to allocate
397  // TODO: eventually support allocation flags
398  if ((error = cudaHostAlloc(&search_key.d_ptr, search_key.bytes, cudaHostAllocDefault)) ==
399  cudaErrorMemoryAllocation) {
400  // The allocation attempt failed: free all cached blocks on device and retry
401  if (debug)
402  printf(
403  "\tHost failed to allocate %lld bytes for stream %lld on device %lld, retrying after freeing cached "
404  "allocations",
405  (long long)search_key.bytes,
406  (long long)search_key.associated_stream,
407  (long long)search_key.device);
408 
409  error = cudaSuccess; // Reset the error we will return
410  cudaGetLastError(); // Reset CUDART's error
411 
412  // Lock
413  mutex.lock();
414 
415  // Iterate the range of free blocks
416  CachedBlocks::iterator block_itr = cached_blocks.begin();
417 
418  while ((block_itr != cached_blocks.end())) {
419  // No need to worry about synchronization with the device: cudaFree is
420  // blocking and will synchronize across all kernels executing
421  // on the current device
422 
423  // Free pinned host memory.
424  if ((error = cudaFreeHost(block_itr->d_ptr)))
425  break;
426  if ((error = cudaEventDestroy(block_itr->ready_event)))
427  break;
428 
429  // Reduce balance and erase entry
430  cached_bytes.free -= block_itr->bytes;
431 
432  if (debug)
433  printf(
434  "\tHost freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld "
435  "bytes) outstanding.\n",
436  (long long)block_itr->bytes,
437  (long long)cached_blocks.size(),
438  (long long)cached_bytes.free,
439  (long long)live_blocks.size(),
440  (long long)cached_bytes.live);
441 
442  cached_blocks.erase(block_itr);
443 
444  block_itr++;
445  }
446 
447  // Unlock
448  mutex.unlock();
449 
450  // Return under error
451  if (error)
452  return error;
453 
454  // Try to allocate again
455  cudaCheck(error = cudaHostAlloc(&search_key.d_ptr, search_key.bytes, cudaHostAllocDefault));
456  }
457 
458  // Create ready event
459  cudaCheck(error = cudaEventCreateWithFlags(&search_key.ready_event, cudaEventDisableTiming));
460 
461  // Insert into live blocks
462  mutex.lock();
463  live_blocks.insert(search_key);
464  cached_bytes.live += search_key.bytes;
465  mutex.unlock();
466 
467  if (debug)
468  printf(
469  "\tHost allocated new host block at %p (%lld bytes associated with stream %lld, event %lld on device "
470  "%lld).\n",
471  search_key.d_ptr,
472  (long long)search_key.bytes,
473  (long long)search_key.associated_stream,
474  (long long)search_key.ready_event,
475  (long long)search_key.device);
476  }
477 
478  // Copy host pointer to output parameter
479  *d_ptr = search_key.d_ptr;
480 
481  if (debug)
482  printf("\t\t%lld available blocks cached (%lld bytes), %lld live blocks outstanding(%lld bytes).\n",
483  (long long)cached_blocks.size(),
484  (long long)cached_bytes.free,
485  (long long)live_blocks.size(),
486  (long long)cached_bytes.live);
487 
488  return error;
489  }
490 
496  cudaError_t HostFree(void *d_ptr) {
497  int entrypoint_device = INVALID_DEVICE_ORDINAL;
498  cudaError_t error = cudaSuccess;
499 
500  // Lock
501  mutex.lock();
502 
503  // Find corresponding block descriptor
504  bool recached = false;
505  BlockDescriptor search_key(d_ptr);
506  BusyBlocks::iterator block_itr = live_blocks.find(search_key);
507  if (block_itr != live_blocks.end()) {
508  // Remove from live blocks
509  search_key = *block_itr;
510  live_blocks.erase(block_itr);
511  cached_bytes.live -= search_key.bytes;
512 
513  // Keep the returned allocation if bin is valid and we won't exceed the max cached threshold
514  if ((search_key.bin != INVALID_BIN) && (cached_bytes.free + search_key.bytes <= max_cached_bytes)) {
515  // Insert returned allocation into free blocks
516  recached = true;
517  cached_blocks.insert(search_key);
518  cached_bytes.free += search_key.bytes;
519 
520  if (debug)
521  printf(
522  "\tHost returned %lld bytes from associated stream %lld, event %lld on device %lld.\n\t\t %lld "
523  "available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n",
524  (long long)search_key.bytes,
525  (long long)search_key.associated_stream,
526  (long long)search_key.ready_event,
527  (long long)search_key.device,
528  (long long)cached_blocks.size(),
529  (long long)cached_bytes.free,
530  (long long)live_blocks.size(),
531  (long long)cached_bytes.live);
532  }
533  }
534 
535  cudaCheck(error = cudaGetDevice(&entrypoint_device));
536  if (entrypoint_device != search_key.device) {
537  cudaCheck(error = cudaSetDevice(search_key.device));
538  }
539 
540  if (recached) {
541  // Insert the ready event in the associated stream (must have current device set properly)
542  cudaCheck(error = cudaEventRecord(search_key.ready_event, search_key.associated_stream));
543  }
544 
545  // Unlock
546  mutex.unlock();
547 
548  if (!recached) {
549  // Free the allocation from the runtime and cleanup the event.
550  cudaCheck(error = cudaFreeHost(d_ptr));
551  cudaCheck(error = cudaEventDestroy(search_key.ready_event));
552 
553  if (debug)
554  printf(
555  "\tHost freed %lld bytes from associated stream %lld, event %lld on device %lld.\n\t\t %lld available "
556  "blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
557  (long long)search_key.bytes,
558  (long long)search_key.associated_stream,
559  (long long)search_key.ready_event,
560  (long long)search_key.device,
561  (long long)cached_blocks.size(),
562  (long long)cached_bytes.free,
563  (long long)live_blocks.size(),
564  (long long)cached_bytes.live);
565  }
566 
567  // Reset device
568  if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != search_key.device)) {
569  cudaCheck(error = cudaSetDevice(entrypoint_device));
570  }
571 
572  return error;
573  }
574 
578  cudaError_t FreeAllCached() {
579  cudaError_t error = cudaSuccess;
580  int entrypoint_device = INVALID_DEVICE_ORDINAL;
581  int current_device = INVALID_DEVICE_ORDINAL;
582 
583  mutex.lock();
584 
585  while (!cached_blocks.empty()) {
586  // Get first block
587  CachedBlocks::iterator begin = cached_blocks.begin();
588 
589  // Get entry-point device ordinal if necessary
590  if (entrypoint_device == INVALID_DEVICE_ORDINAL) {
591  if ((error = cudaGetDevice(&entrypoint_device)))
592  break;
593  }
594 
595  // Set current device ordinal if necessary
596  if (begin->device != current_device) {
597  if ((error = cudaSetDevice(begin->device)))
598  break;
599  current_device = begin->device;
600  }
601 
602  // Free host memory
603  if ((error = cudaFreeHost(begin->d_ptr)))
604  break;
605  if ((error = cudaEventDestroy(begin->ready_event)))
606  break;
607 
608  // Reduce balance and erase entry
609  cached_bytes.free -= begin->bytes;
610 
611  if (debug)
612  printf(
613  "\tHost freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld "
614  "bytes) outstanding.\n",
615  (long long)begin->bytes,
616  (long long)cached_blocks.size(),
617  (long long)cached_bytes.free,
618  (long long)live_blocks.size(),
619  (long long)cached_bytes.live);
620 
621  cached_blocks.erase(begin);
622  }
623 
624  mutex.unlock();
625 
626  // Attempt to revert back to entry-point device if necessary
627  if (entrypoint_device != INVALID_DEVICE_ORDINAL) {
628  cudaCheck(error = cudaSetDevice(entrypoint_device));
629  }
630 
631  return error;
632  }
633 
638  if (!skip_cleanup)
639  FreeAllCached();
640  }
641  };
642  // end group UtilMgmt
644 
645 } // namespace notcub
646 
647 #endif
notcub::CachingHostAllocator::max_cached_bytes
size_t max_cached_bytes
Maximum bin size.
Definition: CachingHostAllocator.h:248
notcub::CachingHostAllocator::BlockDescriptor::device
int device
Definition: CachingHostAllocator.h:152
notcub::CachingHostAllocator::cached_blocks
CachedBlocks cached_blocks
Aggregate cached bytes.
Definition: CachingHostAllocator.h:255
notcub::CachingHostAllocator::TotalBytes::live
size_t live
Definition: CachingHostAllocator.h:187
electrons_cff.bool
bool
Definition: electrons_cff.py:372
notcub::CachingHostAllocator::bin_growth
unsigned int bin_growth
Mutex for thread-safety.
Definition: CachingHostAllocator.h:242
notcub::CachingHostAllocator::max_bin_bytes
size_t max_bin_bytes
Minimum bin size.
Definition: CachingHostAllocator.h:247
notcub::CachingHostAllocator::skip_cleanup
const bool skip_cleanup
Maximum aggregate cached bytes.
Definition: CachingHostAllocator.h:251
notcub::CachingHostAllocator::HostAllocate
cudaError_t HostAllocate(void **d_ptr, size_t bytes, cudaStream_t active_stream=nullptr)
Provides a suitable allocation of pinned host memory for the given size.
Definition: CachingHostAllocator.h:337
newFWLiteAna.found
found
Definition: newFWLiteAna.py:118
relativeConstraints.error
error
Definition: relativeConstraints.py:53
notcub::CachingHostAllocator::BlockDescriptor::BlockDescriptor
BlockDescriptor()
Definition: CachingHostAllocator.h:166
notcub
CUB namespace.
Definition: CachingDeviceAllocator.h:47
notcub::CachingHostAllocator::BlockDescriptor::PtrCompare
static bool PtrCompare(const BlockDescriptor &a, const BlockDescriptor &b)
Definition: CachingHostAllocator.h:175
notcub::CachingHostAllocator::cached_bytes
TotalBytes cached_bytes
Whether or not to print (de)allocation events to stdout.
Definition: CachingHostAllocator.h:254
notcub::CachingHostAllocator::BusyBlocks
std::multiset< BlockDescriptor, Compare > BusyBlocks
Set type for live blocks (ordered by ptr)
Definition: CachingHostAllocator.h:195
notcub::CachingHostAllocator::~CachingHostAllocator
~CachingHostAllocator()
Destructor.
Definition: CachingHostAllocator.h:662
notcub::CachingHostAllocator::mutex
std::mutex mutex
Definition: CachingHostAllocator.h:240
notcub::CachingHostAllocator::INVALID_BIN
static const unsigned int INVALID_BIN
Out-of-bounds bin.
Definition: CachingHostAllocator.h:131
notcub::CachingHostAllocator::BlockDescriptor::SizeCompare
static bool SizeCompare(const BlockDescriptor &a, const BlockDescriptor &b)
Definition: CachingHostAllocator.h:178
notcub::CachingHostAllocator::INVALID_SIZE
static const size_t INVALID_SIZE
Invalid size.
Definition: CachingHostAllocator.h:134
mutex
static boost::mutex mutex
Definition: Proxy.cc:9
b
double b
Definition: hdecay.h:118
notcub::CachingHostAllocator::debug
bool debug
Whether or not to skip a call to FreeAllCached() when destructor is called. (The CUDA runtime may hav...
Definition: CachingHostAllocator.h:252
notcub::CachingHostAllocator::INVALID_DEVICE_ORDINAL
static const int INVALID_DEVICE_ORDINAL
Invalid device ordinal.
Definition: CachingHostAllocator.h:139
notcub::CachingHostAllocator::BlockDescriptor::ready_event
cudaEvent_t ready_event
Definition: CachingHostAllocator.h:154
a
double a
Definition: hdecay.h:119
notcub::CachingHostAllocator::CachingHostAllocator
CachingHostAllocator(unsigned int bin_growth, unsigned int min_bin=1, unsigned int max_bin=INVALID_BIN, size_t max_cached_bytes=INVALID_SIZE, bool skip_cleanup=false, bool debug=false)
Set of live pinned host allocations currently in use.
Definition: CachingHostAllocator.h:267
notcub::CachingHostAllocator::SetMaxCachedBytes
void SetMaxCachedBytes(size_t max_cached_bytes)
Sets the limit on the number bytes this allocator is allowed to cache.
Definition: CachingHostAllocator.h:317
createfilelist.int
int
Definition: createfilelist.py:10
value
Definition: value.py:1
cudaCheck.h
notcub::CachingHostAllocator::TotalBytes::TotalBytes
TotalBytes()
Definition: CachingHostAllocator.h:188
notcub::CachingHostAllocator::BlockDescriptor::d_ptr
void * d_ptr
Definition: CachingHostAllocator.h:149
notcub::CachingHostAllocator::CachedBlocks
std::multiset< BlockDescriptor, Compare > CachedBlocks
Set type for cached blocks (ordered by size)
Definition: CachingHostAllocator.h:192
notcub::CachingHostAllocator::min_bin_bytes
size_t min_bin_bytes
Maximum bin enumeration.
Definition: CachingHostAllocator.h:246
cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:62
notcub::CachingHostAllocator::FreeAllCached
cudaError_t FreeAllCached()
Frees all cached pinned host allocations.
Definition: CachingHostAllocator.h:603
notcub::CachingHostAllocator::BlockDescriptor::bytes
size_t bytes
Definition: CachingHostAllocator.h:150
notcub::CachingHostAllocator::live_blocks
BusyBlocks live_blocks
Set of cached pinned host allocations available for reuse.
Definition: CachingHostAllocator.h:256
notcub::CachingHostAllocator::max_bin
unsigned int max_bin
Minimum bin enumeration.
Definition: CachingHostAllocator.h:244
notcub::CachingHostAllocator::IntPow
static unsigned int IntPow(unsigned int base, unsigned int exp)
Definition: CachingHostAllocator.h:204
notcub::CachingHostAllocator::BlockDescriptor::associated_stream
cudaStream_t associated_stream
Definition: CachingHostAllocator.h:153
notcub::CachingHostAllocator::BlockDescriptor
Definition: CachingHostAllocator.h:148
notcub::CachingHostAllocator::TotalBytes::free
size_t free
Definition: CachingHostAllocator.h:186
notcub::CachingHostAllocator::min_bin
unsigned int min_bin
Geometric growth factor for bin-sizes.
Definition: CachingHostAllocator.h:243
notcub::CachingHostAllocator::TotalBytes
Definition: CachingHostAllocator.h:184
notcub::CachingHostAllocator::HostFree
cudaError_t HostFree(void *d_ptr)
Frees a live allocation of pinned host memory, returning it to the allocator.
Definition: CachingHostAllocator.h:521
JetChargeProducer_cfi.exp
exp
Definition: JetChargeProducer_cfi.py:6
newFWLiteAna.base
base
Definition: newFWLiteAna.py:92
notcub::CachingHostAllocator::NearestPowerOf
void NearestPowerOf(unsigned int &power, size_t &rounded_bytes, unsigned int base, size_t value)
Definition: CachingHostAllocator.h:219
begin
#define begin
Definition: vmac.h:32
notcub::CachingHostAllocator::Compare
bool(* Compare)(const BlockDescriptor &, const BlockDescriptor &)
BlockDescriptor comparator function interface.
Definition: CachingHostAllocator.h:182
notcub::CachingHostAllocator::BlockDescriptor::bin
unsigned int bin
Definition: CachingHostAllocator.h:151