dd/d5a/CachingHostAllocator_8h_source.html

 #ifndef HeterogenousCore_CUDAUtilities_src_CachingHostAllocator_h
 #define HeterogenousCore_CUDAUtilities_src_CachingHostAllocator_h

 /******************************************************************************
  * Copyright (c) 2011, Duane Merrill.  All rights reserved.
  * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright
  *       notice, this list of conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright
  *       notice, this list of conditions and the following disclaimer in the
  *       documentation and/or other materials provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the
  *       names of its contributors may be used to endorse or promote products
  *       derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  ******************************************************************************/

 /******************************************************************************
  * Simple caching allocator for pinned host memory allocations. The allocator is
  * thread-safe.
  ******************************************************************************/

 #include <cmath>
 #include <map>
 #include <set>
 #include <mutex>

 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

 namespace notcub {

   /******************************************************************************
  * CachingHostAllocator (host use)
  ******************************************************************************/

   struct CachingHostAllocator {
     //---------------------------------------------------------------------
     // Constants
     //---------------------------------------------------------------------

     static const unsigned int INVALID_BIN = (unsigned int)-1;

     static const size_t INVALID_SIZE = (size_t)-1;

 #ifndef DOXYGEN_SHOULD_SKIP_THIS  // Do not document

     static const int INVALID_DEVICE_ORDINAL = -1;

     //---------------------------------------------------------------------
     // Type definitions and helper types
     //---------------------------------------------------------------------

     struct BlockDescriptor {
       void *d_ptr;                     // Host pointer
       size_t bytes;                    // Size of allocation in bytes
       unsigned int bin;                // Bin enumeration
       int device;                      // device ordinal
       cudaStream_t associated_stream;  // Associated associated_stream
       cudaEvent_t ready_event;  // Signal when associated stream has run to the point at which this block was freed

       // Constructor (suitable for searching maps for a specific block, given its pointer)
       BlockDescriptor(void *d_ptr)
           : d_ptr(d_ptr),
             bytes(0),
             bin(INVALID_BIN),
             device(INVALID_DEVICE_ORDINAL),
             associated_stream(nullptr),
             ready_event(nullptr) {}

       // Constructor (suitable for searching maps for a range of suitable blocks)
       BlockDescriptor()
           : d_ptr(nullptr),
             bytes(0),
             bin(INVALID_BIN),
             device(INVALID_DEVICE_ORDINAL),
             associated_stream(nullptr),
             ready_event(nullptr) {}

       // Comparison functor for comparing host pointers
       static bool PtrCompare(const BlockDescriptor &a, const BlockDescriptor &b) { return (a.d_ptr < b.d_ptr); }

       // Comparison functor for comparing allocation sizes
       static bool SizeCompare(const BlockDescriptor &a, const BlockDescriptor &b) { return (a.bytes < b.bytes); }
     };

     typedef bool (*Compare)(const BlockDescriptor &, const BlockDescriptor &);

     class TotalBytes {
     public:
       size_t free;
       size_t live;
       TotalBytes() { free = live = 0; }
     };

     typedef std::multiset<BlockDescriptor, Compare> CachedBlocks;

     typedef std::multiset<BlockDescriptor, Compare> BusyBlocks;

     //---------------------------------------------------------------------
     // Utility functions
     //---------------------------------------------------------------------

     static unsigned int IntPow(unsigned int base, unsigned int exp) {
       unsigned int retval = 1;
       while (exp > 0) {
         if (exp & 1) {
           retval = retval * base;  // multiply the result by the current base
         }
         base = base * base;  // square the base
         exp = exp >> 1;      // divide the exponent in half
       }
       return retval;
     }

     void NearestPowerOf(unsigned int &power, size_t &rounded_bytes, unsigned int base, size_t value) {
       power = 0;
       rounded_bytes = 1;

       if (value * base < value) {
         // Overflow
         power = sizeof(size_t) * 8;
         rounded_bytes = size_t(0) - 1;
         return;
       }

       while (rounded_bytes < value) {
         rounded_bytes *= base;
         power++;
       }
     }

     //---------------------------------------------------------------------
     // Fields
     //---------------------------------------------------------------------

     std::mutex mutex;

     unsigned int bin_growth;
     unsigned int min_bin;
     unsigned int max_bin;

     size_t min_bin_bytes;
     size_t max_bin_bytes;
     size_t max_cached_bytes;

     const bool
         skip_cleanup;
     bool debug;

     TotalBytes cached_bytes;
     CachedBlocks cached_blocks;
     BusyBlocks live_blocks;

 #endif  // DOXYGEN_SHOULD_SKIP_THIS

     //---------------------------------------------------------------------
     // Methods
     //---------------------------------------------------------------------

     CachingHostAllocator(
         unsigned int bin_growth,
         unsigned int min_bin = 1,
         unsigned int max_bin = INVALID_BIN,
         size_t max_cached_bytes = INVALID_SIZE,
         bool skip_cleanup =
             false,
         bool debug = false)
         : bin_growth(bin_growth),
           min_bin(min_bin),
           max_bin(max_bin),
           min_bin_bytes(IntPow(bin_growth, min_bin)),
           max_bin_bytes(IntPow(bin_growth, max_bin)),
           max_cached_bytes(max_cached_bytes),
           skip_cleanup(skip_cleanup),
           debug(debug),
           cached_blocks(BlockDescriptor::SizeCompare),
           live_blocks(BlockDescriptor::PtrCompare) {}

     CachingHostAllocator(bool skip_cleanup = false, bool debug = false)
         : bin_growth(8),
           min_bin(3),
           max_bin(7),
           min_bin_bytes(IntPow(bin_growth, min_bin)),
           max_bin_bytes(IntPow(bin_growth, max_bin)),
           max_cached_bytes((max_bin_bytes * 3) - 1),
           skip_cleanup(skip_cleanup),
           debug(debug),
           cached_blocks(BlockDescriptor::SizeCompare),
           live_blocks(BlockDescriptor::PtrCompare) {}

     void SetMaxCachedBytes(size_t max_cached_bytes) {
       // Lock
       std::unique_lock mutex_locker(mutex);

       if (debug)
         printf("Changing max_cached_bytes (%lld -> %lld)\n",
                (long long)this->max_cached_bytes,
                (long long)max_cached_bytes);

       this->max_cached_bytes = max_cached_bytes;

       // Unlock (redundant, kept for style uniformity)
       mutex_locker.unlock();
     }

     cudaError_t HostAllocate(
         void **d_ptr,
         size_t bytes,
         cudaStream_t active_stream = nullptr)
     {
       std::unique_lock<std::mutex> mutex_locker(mutex, std::defer_lock);
       *d_ptr = nullptr;
       int device = INVALID_DEVICE_ORDINAL;
       cudaError_t error = cudaSuccess;

       cudaCheck(error = cudaGetDevice(&device));

       // Create a block descriptor for the requested allocation
       bool found = false;
       BlockDescriptor search_key;
       search_key.device = device;
       search_key.associated_stream = active_stream;
       NearestPowerOf(search_key.bin, search_key.bytes, bin_growth, bytes);

       if (search_key.bin > max_bin) {
         // Bin is greater than our maximum bin: allocate the request
         // exactly and give out-of-bounds bin.  It will not be cached
         // for reuse when returned.
         search_key.bin = INVALID_BIN;
         search_key.bytes = bytes;
       } else {
         // Search for a suitable cached allocation: lock
         mutex_locker.lock();

         if (search_key.bin < min_bin) {
           // Bin is less than minimum bin: round up
           search_key.bin = min_bin;
           search_key.bytes = min_bin_bytes;
         }

         // Iterate through the range of cached blocks in the same bin
         CachedBlocks::iterator block_itr = cached_blocks.lower_bound(search_key);
         while ((block_itr != cached_blocks.end()) && (block_itr->bin == search_key.bin)) {
           // To prevent races with reusing blocks returned by the host but still
           // in use for transfers, only consider cached blocks that are from an idle stream
           if (cudaEventQuery(block_itr->ready_event) != cudaErrorNotReady) {
             // Reuse existing cache block.  Insert into live blocks.
             found = true;
             search_key = *block_itr;
             search_key.associated_stream = active_stream;
             if (search_key.device != device) {
               // If "associated" device changes, need to re-create the event on the right device
               cudaCheck(error = cudaSetDevice(search_key.device));
               cudaCheck(error = cudaEventDestroy(search_key.ready_event));
               cudaCheck(error = cudaSetDevice(device));
               cudaCheck(error = cudaEventCreateWithFlags(&search_key.ready_event, cudaEventDisableTiming));
               search_key.device = device;
             }

             live_blocks.insert(search_key);

             // Remove from free blocks
             cached_bytes.free -= search_key.bytes;
             cached_bytes.live += search_key.bytes;

             if (debug)
               printf(
                   "\tHost reused cached block at %p (%lld bytes) for stream %lld, event %lld on device %lld "
                   "(previously associated with stream %lld, event %lld).\n",
                   search_key.d_ptr,
                   (long long)search_key.bytes,
                   (long long)search_key.associated_stream,
                   (long long)search_key.ready_event,
                   (long long)search_key.device,
                   (long long)block_itr->associated_stream,
                   (long long)block_itr->ready_event);

             cached_blocks.erase(block_itr);

             break;
           }
           block_itr++;
         }

         // Done searching: unlock
         mutex_locker.unlock();
       }

       // Allocate the block if necessary
       if (!found) {
         // Attempt to allocate
         // TODO: eventually support allocation flags
         if ((error = cudaHostAlloc(&search_key.d_ptr, search_key.bytes, cudaHostAllocDefault)) ==
             cudaErrorMemoryAllocation) {
           // The allocation attempt failed: free all cached blocks on device and retry
           if (debug)
             printf(
                 "\tHost failed to allocate %lld bytes for stream %lld on device %lld, retrying after freeing cached "
                 "allocations",
                 (long long)search_key.bytes,
                 (long long)search_key.associated_stream,
                 (long long)search_key.device);

           error = cudaSuccess;  // Reset the error we will return
           cudaGetLastError();   // Reset CUDART's error

           // Lock
           mutex_locker.lock();

           // Iterate the range of free blocks
           CachedBlocks::iterator block_itr = cached_blocks.begin();

           while ((block_itr != cached_blocks.end())) {
             // No need to worry about synchronization with the device: cudaFree is
             // blocking and will synchronize across all kernels executing
             // on the current device

             // Free pinned host memory.
             if ((error = cudaFreeHost(block_itr->d_ptr)))
               break;
             if ((error = cudaEventDestroy(block_itr->ready_event)))
               break;

             // Reduce balance and erase entry
             cached_bytes.free -= block_itr->bytes;

             if (debug)
               printf(
                   "\tHost freed %lld bytes.\n\t\t  %lld available blocks cached (%lld bytes), %lld live blocks (%lld "
                   "bytes) outstanding.\n",
                   (long long)block_itr->bytes,
                   (long long)cached_blocks.size(),
                   (long long)cached_bytes.free,
                   (long long)live_blocks.size(),
                   (long long)cached_bytes.live);

             cached_blocks.erase(block_itr);

             block_itr++;
           }

           // Unlock
           mutex_locker.unlock();

           // Return under error
           if (error)
             return error;

           // Try to allocate again
           cudaCheck(error = cudaHostAlloc(&search_key.d_ptr, search_key.bytes, cudaHostAllocDefault));
         }

         // Create ready event
         cudaCheck(error = cudaEventCreateWithFlags(&search_key.ready_event, cudaEventDisableTiming));

         // Insert into live blocks
         mutex_locker.lock();
         live_blocks.insert(search_key);
         cached_bytes.live += search_key.bytes;
         mutex_locker.unlock();

         if (debug)
           printf(
               "\tHost allocated new host block at %p (%lld bytes associated with stream %lld, event %lld on device "
               "%lld).\n",
               search_key.d_ptr,
               (long long)search_key.bytes,
               (long long)search_key.associated_stream,
               (long long)search_key.ready_event,
               (long long)search_key.device);
       }

       // Copy host pointer to output parameter
       *d_ptr = search_key.d_ptr;

       if (debug)
         printf("\t\t%lld available blocks cached (%lld bytes), %lld live blocks outstanding(%lld bytes).\n",
                (long long)cached_blocks.size(),
                (long long)cached_bytes.free,
                (long long)live_blocks.size(),
                (long long)cached_bytes.live);

       return error;
     }

     cudaError_t HostFree(void *d_ptr) {
       int entrypoint_device = INVALID_DEVICE_ORDINAL;
       cudaError_t error = cudaSuccess;

       // Lock
       std::unique_lock<std::mutex> mutex_locker(mutex);

       // Find corresponding block descriptor
       bool recached = false;
       BlockDescriptor search_key(d_ptr);
       BusyBlocks::iterator block_itr = live_blocks.find(search_key);
       if (block_itr != live_blocks.end()) {
         // Remove from live blocks
         search_key = *block_itr;
         live_blocks.erase(block_itr);
         cached_bytes.live -= search_key.bytes;

         // Keep the returned allocation if bin is valid and we won't exceed the max cached threshold
         if ((search_key.bin != INVALID_BIN) && (cached_bytes.free + search_key.bytes <= max_cached_bytes)) {
           // Insert returned allocation into free blocks
           recached = true;
           cached_blocks.insert(search_key);
           cached_bytes.free += search_key.bytes;

           if (debug)
             printf(
                 "\tHost returned %lld bytes from associated stream %lld, event %lld on device %lld.\n\t\t %lld "
                 "available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n",
                 (long long)search_key.bytes,
                 (long long)search_key.associated_stream,
                 (long long)search_key.ready_event,
                 (long long)search_key.device,
                 (long long)cached_blocks.size(),
                 (long long)cached_bytes.free,
                 (long long)live_blocks.size(),
                 (long long)cached_bytes.live);
         }
       }

       cudaCheck(error = cudaGetDevice(&entrypoint_device));
       if (entrypoint_device != search_key.device) {
         cudaCheck(error = cudaSetDevice(search_key.device));
       }

       if (recached) {
         // Insert the ready event in the associated stream (must have current device set properly)
         cudaCheck(error = cudaEventRecord(search_key.ready_event, search_key.associated_stream));
       }

       // Unlock
       mutex_locker.unlock();

       if (!recached) {
         // Free the allocation from the runtime and cleanup the event.
         cudaCheck(error = cudaFreeHost(d_ptr));
         cudaCheck(error = cudaEventDestroy(search_key.ready_event));

         if (debug)
           printf(
               "\tHost freed %lld bytes from associated stream %lld, event %lld on device %lld.\n\t\t  %lld available "
               "blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
               (long long)search_key.bytes,
               (long long)search_key.associated_stream,
               (long long)search_key.ready_event,
               (long long)search_key.device,
               (long long)cached_blocks.size(),
               (long long)cached_bytes.free,
               (long long)live_blocks.size(),
               (long long)cached_bytes.live);
       }

       // Reset device
       if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != search_key.device)) {
         cudaCheck(error = cudaSetDevice(entrypoint_device));
       }

       return error;
     }

     cudaError_t FreeAllCached() {
       cudaError_t error = cudaSuccess;
       int entrypoint_device = INVALID_DEVICE_ORDINAL;
       int current_device = INVALID_DEVICE_ORDINAL;

       std::unique_lock<std::mutex> mutex_locker(mutex);

       while (!cached_blocks.empty()) {
         // Get first block
         CachedBlocks::iterator begin = cached_blocks.begin();

         // Get entry-point device ordinal if necessary
         if (entrypoint_device == INVALID_DEVICE_ORDINAL) {
           if ((error = cudaGetDevice(&entrypoint_device)))
             break;
         }

         // Set current device ordinal if necessary
         if (begin->device != current_device) {
           if ((error = cudaSetDevice(begin->device)))
             break;
           current_device = begin->device;
         }

         // Free host memory
         if ((error = cudaFreeHost(begin->d_ptr)))
           break;
         if ((error = cudaEventDestroy(begin->ready_event)))
           break;

         // Reduce balance and erase entry
         cached_bytes.free -= begin->bytes;

         if (debug)
           printf(
               "\tHost freed %lld bytes.\n\t\t  %lld available blocks cached (%lld bytes), %lld live blocks (%lld "
               "bytes) outstanding.\n",
               (long long)begin->bytes,
               (long long)cached_blocks.size(),
               (long long)cached_bytes.free,
               (long long)live_blocks.size(),
               (long long)cached_bytes.live);

         cached_blocks.erase(begin);
       }

       mutex_locker.unlock();

       // Attempt to revert back to entry-point device if necessary
       if (entrypoint_device != INVALID_DEVICE_ORDINAL) {
         cudaCheck(error = cudaSetDevice(entrypoint_device));
       }

       return error;
     }

     ~CachingHostAllocator() {
       if (!skip_cleanup)
         FreeAllCached();
     }
   };
   // end group UtilMgmt

 }  // namespace notcub

 #endif
notcub::CachingHostAllocator::TotalBytes::TotalBytes
TotalBytes()
Definition: CachingHostAllocator.h:163

notcub::CachingHostAllocator::BlockDescriptor::device
int device
Definition: CachingHostAllocator.h:127

notcub::CachingHostAllocator::Compare
bool(* Compare)(const BlockDescriptor &, const BlockDescriptor &)
BlockDescriptor comparator function interface.
Definition: CachingHostAllocator.h:157

notcub::CachingHostAllocator::CachingHostAllocator
CachingHostAllocator(bool skip_cleanup=false, bool debug=false)
Default constructor.
Definition: CachingHostAllocator.h:274

notcub::CachingHostAllocator::BlockDescriptor::SizeCompare
static bool SizeCompare(const BlockDescriptor &a, const BlockDescriptor &b)
Definition: CachingHostAllocator.h:153

notcub::CachingHostAllocator::IntPow
static unsigned int IntPow(unsigned int base, unsigned int exp)
Definition: CachingHostAllocator.h:179

notcub::CachingHostAllocator::BlockDescriptor::bytes
size_t bytes
Definition: CachingHostAllocator.h:125

notcub::CachingHostAllocator::BlockDescriptor::PtrCompare
static bool PtrCompare(const BlockDescriptor &a, const BlockDescriptor &b)
Definition: CachingHostAllocator.h:150

relativeConstraints.error
error
Definition: relativeConstraints.py:53

notcub
CUB namespace.
Definition: CachingDeviceAllocator.h:50

notcub::CachingHostAllocator::SetMaxCachedBytes
void SetMaxCachedBytes(size_t max_cached_bytes)
Sets the limit on the number bytes this allocator is allowed to cache.
Definition: CachingHostAllocator.h:292

mutex
static std::mutex mutex
Definition: Proxy.cc:8

newFWLiteAna.base
base
Main Program
Definition: newFWLiteAna.py:92

notcub::CachingHostAllocator::NearestPowerOf
void NearestPowerOf(unsigned int &power, size_t &rounded_bytes, unsigned int base, size_t value)
Definition: CachingHostAllocator.h:194

notcub::CachingHostAllocator::BlockDescriptor::BlockDescriptor
BlockDescriptor(void *d_ptr)
Definition: CachingHostAllocator.h:132

notcub::CachingHostAllocator::mutex
std::mutex mutex
Definition: CachingHostAllocator.h:215

notcub::CachingHostAllocator::BlockDescriptor::ready_event
cudaEvent_t ready_event
Definition: CachingHostAllocator.h:129

notcub::CachingHostAllocator::cached_blocks
CachedBlocks cached_blocks
Aggregate cached bytes.
Definition: CachingHostAllocator.h:230

notcub::CachingHostAllocator::TotalBytes::free
size_t free
Definition: CachingHostAllocator.h:161

notcub::CachingHostAllocator::CachedBlocks
std::multiset< BlockDescriptor, Compare > CachedBlocks
Set type for cached blocks (ordered by size)
Definition: CachingHostAllocator.h:167

notcub::CachingHostAllocator::min_bin_bytes
size_t min_bin_bytes
Maximum bin enumeration.
Definition: CachingHostAllocator.h:221

notcub::CachingHostAllocator::BlockDescriptor::d_ptr
void * d_ptr
Definition: CachingHostAllocator.h:124

notcub::CachingHostAllocator::BlockDescriptor::associated_stream
cudaStream_t associated_stream
Definition: CachingHostAllocator.h:128

createfilelist.int
int
Definition: createfilelist.py:10

notcub::CachingHostAllocator::INVALID_SIZE
static const size_t INVALID_SIZE
Invalid size.
Definition: CachingHostAllocator.h:109

notcub::CachingHostAllocator::CachingHostAllocator
CachingHostAllocator(unsigned int bin_growth, unsigned int min_bin=1, unsigned int max_bin=INVALID_BIN, size_t max_cached_bytes=INVALID_SIZE, bool skip_cleanup=false, bool debug=false)
Set of live pinned host allocations currently in use.
Definition: CachingHostAllocator.h:242

notcub::CachingHostAllocator::max_bin
unsigned int max_bin
Minimum bin enumeration.
Definition: CachingHostAllocator.h:219

notcub::CachingHostAllocator::max_cached_bytes
size_t max_cached_bytes
Maximum bin size.
Definition: CachingHostAllocator.h:223

notcub::CachingHostAllocator::cached_bytes
TotalBytes cached_bytes
Whether or not to print (de)allocation events to stdout.
Definition: CachingHostAllocator.h:229

value
Definition: value.py:1

notcub::CachingHostAllocator::max_bin_bytes
size_t max_bin_bytes
Minimum bin size.
Definition: CachingHostAllocator.h:222

electrons_cff.bool
bool
Definition: electrons_cff.py:381

notcub::CachingHostAllocator::BlockDescriptor
Definition: CachingHostAllocator.h:123

notcub::CachingHostAllocator::INVALID_BIN
static const unsigned int INVALID_BIN
Out-of-bounds bin.
Definition: CachingHostAllocator.h:106

notcub::CachingHostAllocator::BlockDescriptor::BlockDescriptor
BlockDescriptor()
Definition: CachingHostAllocator.h:141

notcub::CachingHostAllocator::BlockDescriptor::bin
unsigned int bin
Definition: CachingHostAllocator.h:126

notcub::CachingHostAllocator::min_bin
unsigned int min_bin
Geometric growth factor for bin-sizes.
Definition: CachingHostAllocator.h:218

notcub::CachingHostAllocator::FreeAllCached
cudaError_t FreeAllCached()
Frees all cached pinned host allocations.
Definition: CachingHostAllocator.h:579

notcub::CachingHostAllocator::bin_growth
unsigned int bin_growth
Mutex for thread-safety.
Definition: CachingHostAllocator.h:217

notcub::CachingHostAllocator::INVALID_DEVICE_ORDINAL
static const int INVALID_DEVICE_ORDINAL
Invalid device ordinal.
Definition: CachingHostAllocator.h:114

cudaCheck.h

b
double b
Definition: hdecay.h:118

notcub::CachingHostAllocator::~CachingHostAllocator
~CachingHostAllocator()
Destructor.
Definition: CachingHostAllocator.h:638

notcub::CachingHostAllocator::debug
bool debug
Whether or not to skip a call to FreeAllCached() when destructor is called. (The CUDA runtime may hav...
Definition: CachingHostAllocator.h:227

notcub::CachingHostAllocator::live_blocks
BusyBlocks live_blocks
Set of cached pinned host allocations available for reuse.
Definition: CachingHostAllocator.h:231

notcub::CachingHostAllocator::TotalBytes::live
size_t live
Definition: CachingHostAllocator.h:162

a
double a
Definition: hdecay.h:119

notcub::CachingHostAllocator::HostFree
cudaError_t HostFree(void *d_ptr)
Frees a live allocation of pinned host memory, returning it to the allocator.
Definition: CachingHostAllocator.h:497

notcub::CachingHostAllocator::skip_cleanup
const bool skip_cleanup
Maximum aggregate cached bytes.
Definition: CachingHostAllocator.h:226

notcub::CachingHostAllocator::HostAllocate
cudaError_t HostAllocate(void **d_ptr, size_t bytes, cudaStream_t active_stream=nullptr)
Provides a suitable allocation of pinned host memory for the given size.
Definition: CachingHostAllocator.h:312

cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69

JetChargeProducer_cfi.exp
exp
Definition: JetChargeProducer_cfi.py:6

newFWLiteAna.found
found
Definition: newFWLiteAna.py:118

notcub::CachingHostAllocator::TotalBytes
Definition: CachingHostAllocator.h:159

notcub::CachingHostAllocator
A simple caching allocator pinned host memory allocations.
Definition: CachingHostAllocator.h:100

notcub::CachingHostAllocator::BusyBlocks
std::multiset< BlockDescriptor, Compare > BusyBlocks
Set type for live blocks (ordered by ptr)
Definition: CachingHostAllocator.h:170