1 #ifndef HeterogenousCore_CUDAUtilities_src_CachingHostAllocator_h
2 #define HeterogenousCore_CUDAUtilities_src_CachingHostAllocator_h
100 struct CachingHostAllocator {
111 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
123 struct BlockDescriptor {
167 typedef std::multiset<BlockDescriptor, Compare>
CachedBlocks;
170 typedef std::multiset<BlockDescriptor, Compare>
BusyBlocks;
179 static unsigned int IntPow(
unsigned int base,
unsigned int exp) {
180 unsigned int retval = 1;
183 retval = retval *
base;
200 power =
sizeof(size_t) * 8;
201 rounded_bytes = size_t(0) - 1;
205 while (rounded_bytes <
value) {
206 rounded_bytes *=
base;
233 #endif // DOXYGEN_SHOULD_SKIP_THIS
297 printf(
"Changing max_cached_bytes (%lld -> %lld)\n",
298 (
long long)this->max_cached_bytes,
315 cudaStream_t active_stream =
nullptr)
319 cudaError_t
error = cudaSuccess;
326 search_key.device = device;
327 search_key.associated_stream = active_stream;
330 if (search_key.bin >
max_bin) {
335 search_key.bytes = bytes;
340 if (search_key.bin <
min_bin) {
347 CachedBlocks::iterator block_itr =
cached_blocks.lower_bound(search_key);
348 while ((block_itr !=
cached_blocks.end()) && (block_itr->bin == search_key.bin)) {
351 if (cudaEventQuery(block_itr->ready_event) != cudaErrorNotReady) {
354 search_key = *block_itr;
355 search_key.associated_stream = active_stream;
356 if (search_key.device != device) {
361 cudaCheck(
error = cudaEventCreateWithFlags(&search_key.ready_event, cudaEventDisableTiming));
362 search_key.device = device;
373 "\tHost reused cached block at %p (%lld bytes) for stream %lld, event %lld on device %lld "
374 "(previously associated with stream %lld, event %lld).\n",
376 (
long long)search_key.bytes,
377 (
long long)search_key.associated_stream,
378 (
long long)search_key.ready_event,
379 (
long long)search_key.device,
380 (
long long)block_itr->associated_stream,
381 (
long long)block_itr->ready_event);
398 if ((
error = cudaHostAlloc(&search_key.d_ptr, search_key.bytes, cudaHostAllocDefault)) ==
399 cudaErrorMemoryAllocation) {
403 "\tHost failed to allocate %lld bytes for stream %lld on device %lld, retrying after freeing cached "
405 (
long long)search_key.bytes,
406 (
long long)search_key.associated_stream,
407 (
long long)search_key.device);
424 if ((
error = cudaFreeHost(block_itr->d_ptr)))
426 if ((
error = cudaEventDestroy(block_itr->ready_event)))
434 "\tHost freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld "
435 "bytes) outstanding.\n",
436 (
long long)block_itr->bytes,
455 cudaCheck(
error = cudaHostAlloc(&search_key.d_ptr, search_key.bytes, cudaHostAllocDefault));
459 cudaCheck(
error = cudaEventCreateWithFlags(&search_key.ready_event, cudaEventDisableTiming));
469 "\tHost allocated new host block at %p (%lld bytes associated with stream %lld, event %lld on device "
472 (
long long)search_key.bytes,
473 (
long long)search_key.associated_stream,
474 (
long long)search_key.ready_event,
475 (
long long)search_key.device);
479 *d_ptr = search_key.d_ptr;
482 printf(
"\t\t%lld available blocks cached (%lld bytes), %lld live blocks outstanding(%lld bytes).\n",
498 cudaError_t
error = cudaSuccess;
504 bool recached =
false;
505 BlockDescriptor search_key(d_ptr);
506 BusyBlocks::iterator block_itr =
live_blocks.find(search_key);
509 search_key = *block_itr;
522 "\tHost returned %lld bytes from associated stream %lld, event %lld on device %lld.\n\t\t %lld "
523 "available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n",
524 (
long long)search_key.bytes,
525 (
long long)search_key.associated_stream,
526 (
long long)search_key.ready_event,
527 (
long long)search_key.device,
536 if (entrypoint_device != search_key.device) {
542 cudaCheck(
error = cudaEventRecord(search_key.ready_event, search_key.associated_stream));
555 "\tHost freed %lld bytes from associated stream %lld, event %lld on device %lld.\n\t\t %lld available "
556 "blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
557 (
long long)search_key.bytes,
558 (
long long)search_key.associated_stream,
559 (
long long)search_key.ready_event,
560 (
long long)search_key.device,
579 cudaError_t
error = cudaSuccess;
591 if ((
error = cudaGetDevice(&entrypoint_device)))
596 if (
begin->device != current_device) {
599 current_device =
begin->device;
605 if ((
error = cudaEventDestroy(
begin->ready_event)))
613 "\tHost freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld "
614 "bytes) outstanding.\n",
615 (
long long)
begin->bytes,