dd/df9/TritonMemResource_8cc_source.html

 #include "HeterogeneousCore/SonicTriton/interface/TritonData.h"

 #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h"

 #include "HeterogeneousCore/SonicTriton/interface/TritonMemResource.h"

 #include "HeterogeneousCore/SonicTriton/interface/triton_utils.h"

 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"


 #include <cstring>

 #include <fcntl.h>

 #include <sys/mman.h>

 #include <unistd.h>


 namespace tc = triton::client;


 template <typename IO>

 TritonMemResource<IO>::TritonMemResource(TritonData<IO>* data, const std::string& name, size_t size)

     : data_(data), name_(name), size_(size), addr_(nullptr), closed_(false) {}


 template <typename IO>

 void TritonMemResource<IO>::set() {

   triton_utils::throwIfError(data_->data_->SetSharedMemory(name_, data_->totalByteSize_, 0),

                              "unable to set shared memory (" + name_ + ")");

 }


 template <typename IO>

 TritonHeapResource<IO>::TritonHeapResource(TritonData<IO>* data, const std::string& name, size_t size)

     : TritonMemResource<IO>(data, name, size) {}


 template <>

 void TritonInputHeapResource::copyInput(const void* values, size_t offset) {

   triton_utils::throwIfError(

       data_->data_->AppendRaw(reinterpret_cast<const uint8_t*>(values), data_->byteSizePerBatch_),

       data_->name_ + " toServer(): unable to set data for batch entry " +

           std::to_string(offset / data_->byteSizePerBatch_));

 }


 template <>

 const uint8_t* TritonOutputHeapResource::copyOutput() {

   size_t contentByteSize;

   const uint8_t* values;

   triton_utils::throwIfError(data_->result_->RawData(data_->name_, &values, &contentByteSize),

                              data_->name_ + " fromServer(): unable to get raw");

   if (contentByteSize != data_->totalByteSize_) {

     throw cms::Exception("TritonDataError") << data_->name_ << " fromServer(): unexpected content byte size "

                                             << contentByteSize << " (expected " << data_->totalByteSize_ << ")";

   }

   return values;

 }


 //shared memory helpers based on:

 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/shm_utils.cc (cpu)

 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/simple_grpc_cudashm_client.cc (gpu)


 template <typename IO>

 TritonCpuShmResource<IO>::TritonCpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)

     : TritonMemResource<IO>(data, name, size) {

   //get shared memory region descriptor

   int shm_fd = shm_open(this->name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);

   if (shm_fd == -1)

     throw cms::Exception("TritonError") << "unable to get shared memory descriptor for key: " + this->name_;


   //extend shared memory object

   int res = ftruncate(shm_fd, this->size_);

   if (res == -1)

     throw cms::Exception("TritonError") << "unable to initialize shared memory key " + this->name_ +

                                                " to requested size: " + std::to_string(this->size_);


   //map to process address space

   constexpr size_t offset(0);

   this->addr_ = (uint8_t*)mmap(nullptr, this->size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, offset);

   if (this->addr_ == MAP_FAILED)

     throw cms::Exception("TritonError") << "unable to map to process address space for shared memory key: " +

                                                this->name_;


   //close descriptor

   if (::close(shm_fd) == -1)

     throw cms::Exception("TritonError") << "unable to close descriptor for shared memory key: " + this->name_;


   triton_utils::throwIfError(this->data_->client()->RegisterSystemSharedMemory(this->name_, this->name_, this->size_),

                              "unable to register shared memory region: " + this->name_);

 }


 template <typename IO>

 TritonCpuShmResource<IO>::~TritonCpuShmResource<IO>() {

   close();

 }


 template <typename IO>

 void TritonCpuShmResource<IO>::close() {

   if (this->closed_)

     return;


   triton_utils::throwIfError(this->data_->client()->UnregisterSystemSharedMemory(this->name_),

                              "unable to unregister shared memory region: " + this->name_);


   //unmap

   int tmp_fd = munmap(this->addr_, this->size_);

   if (tmp_fd == -1)

     throw cms::Exception("TritonError") << "unable to munmap for shared memory key: " << this->name_;


   //unlink

   int shm_fd = shm_unlink(this->name_.c_str());

   if (shm_fd == -1)

     throw cms::Exception("TritonError") << "unable to unlink for shared memory key: " << this->name_;


   this->closed_ = true;

 }


 template <>

 void TritonInputCpuShmResource::copyInput(const void* values, size_t offset) {

   std::memcpy(addr_ + offset, values, data_->byteSizePerBatch_);

 }


 template <>

 const uint8_t* TritonOutputCpuShmResource::copyOutput() {

   return addr_;

 }


 template class TritonHeapResource<tc::InferInput>;

 template class TritonCpuShmResource<tc::InferInput>;

 template class TritonHeapResource<tc::InferRequestedOutput>;

 template class TritonCpuShmResource<tc::InferRequestedOutput>;


 #ifdef TRITON_ENABLE_GPU

 template <typename IO>

 TritonGpuShmResource<IO>::TritonGpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)

     : TritonMemResource<IO>(data, name, size), deviceId_(0), handle_(std::make_shared<cudaIpcMemHandle_t>()) {

   //todo: get server device id somehow?

   cudaCheck(cudaSetDevice(deviceId_), "unable to set device ID to " + std::to_string(deviceId_));

   cudaCheck(cudaMalloc((void**)&this->addr_, this->size_), "unable to allocate GPU memory for key: " + this->name_);

   cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->addr_), "unable to get IPC handle for key: " + this->name_);

   triton_utils::throwIfError(

       this->data_->client()->RegisterCudaSharedMemory(this->name_, *handle_, deviceId_, this->size_),

       "unable to register CUDA shared memory region: " + this->name_);

 }


 template <typename IO>

 TritonGpuShmResource<IO>::~TritonGpuShmResource<IO>() {

   close();

 }


 template <typename IO>

 void TritonGpuShmResource<IO>::close() {

   if (this->closed_)

     return;

   triton_utils::throwIfError(this->data_->client()->UnregisterCudaSharedMemory(this->name_),

                              "unable to unregister CUDA shared memory region: " + this->name_);

   cudaCheck(cudaFree(this->addr_), "unable to free GPU memory for key: " + this->name_);

   this->closed_ = true;

 }


 template <>

 void TritonInputGpuShmResource::copyInput(const void* values, size_t offset) {

   cudaCheck(

       cudaMemcpy(addr_ + offset, values, data_->byteSizePerBatch_, cudaMemcpyHostToDevice),

       data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->byteSizePerBatch_) + " bytes to GPU");

 }


 template <>

 const uint8_t* TritonOutputGpuShmResource::copyOutput() {

   //copy back from gpu, keep in scope

   auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);

   cudaCheck(

       cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),

       data_->name_ + " fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) + " bytes from GPU");

   data_->holder_ = ptr;

   return ptr->data();

 }


 template class TritonGpuShmResource<tc::InferInput>;

 template class TritonGpuShmResource<tc::InferRequestedOutput>;

 #endif

Exception
Definition: hltDiff.cc:245

funct::false
false
Definition: Factorize.h:29

TritonMemResource::data_
TritonData< IO > * data_
Definition: TritonMemResource.h:30

TritonCpuShmResource::copyInput
void copyInput(const void *values, size_t offset) override
Definition: TritonMemResource.h:53

TritonHeapResource
Definition: TritonData.h:23

TritonMemResource::size_
size_t size_
Definition: TritonMemResource.h:32

mergeVDriftHistosByStation.name
string name
Definition: mergeVDriftHistosByStation.py:78

AlCaHLTBitMon_QueryRunRegistry.string
string string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256

triton_utils::throwIfError
void throwIfError(const Error &err, std::string_view msg)
Definition: triton_utils.cc:21

TritonCpuShmResource::TritonCpuShmResource
TritonCpuShmResource(TritonData< IO > *data, const std::string &name, size_t size)
Definition: TritonMemResource.cc:54

makeHLTPrescaleTable.values
list values
Definition: makeHLTPrescaleTable.py:224

TritonMemResource::set
virtual void set()
Definition: TritonMemResource.cc:19

TritonClient.h

TritonHeapResource::copyInput
void copyInput(const void *values, size_t offset) override
Definition: TritonMemResource.h:42

TritonCpuShmResource
Definition: TritonData.h:25

TritonCpuShmResource::close
void close() override
Definition: TritonMemResource.cc:88

TritonMemResource::TritonMemResource
TritonMemResource(TritonData< IO > *data, const std::string &name, size_t size)
Definition: TritonMemResource.cc:15

TritonData.h

TritonCpuShmResource::copyOutput
const uint8_t * copyOutput() override
Definition: TritonMemResource.h:54

cudaCheck.h

hltrates_dqm_sourceclient-live_cfg.offset
tuple offset
Definition: hltrates_dqm_sourceclient-live_cfg.py:83

triton_utils.h

TritonHeapResource::TritonHeapResource
TritonHeapResource(TritonData< IO > *data, const std::string &name, size_t size)
Definition: TritonMemResource.cc:25

data
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79

TritonHeapResource::copyOutput
const uint8_t * copyOutput() override
Definition: TritonMemResource.h:43

TritonMemResource
Definition: TritonData.h:21

HLTObjectMonitor_Client_cff.client
tuple client
Definition: HLTObjectMonitor_Client_cff.py:6

cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69

TritonData
Definition: TritonData.h:45

findQualityFiles.size
tuple size
Write out results.
Definition: findQualityFiles.py:443

TritonMemResource.h

TritonMemResource::addr_
uint8_t * addr_
Definition: TritonMemResource.h:33

TritonMemResource::name_
std::string name_
Definition: TritonMemResource.h:31