14 template <
typename IO>
18 template <
typename IO>
21 "unable to set shared memory (" + name_ +
")");
24 template <
typename IO>
31 data_->name_ +
" toServer(): unable to set data for batch entry " +
37 size_t contentByteSize;
40 data_->name_ +
" fromServer(): unable to get raw");
41 if (contentByteSize !=
data_->totalByteSize_) {
42 throw cms::Exception(
"TritonDataError") <<
data_->name_ <<
" fromServer(): unexpected content byte size " 43 << contentByteSize <<
" (expected " <<
data_->totalByteSize_ <<
")";
52 template <
typename IO>
56 this->
size_ = std::max<size_t>(this->
size_, 1);
59 int shm_fd = shm_open(this->
name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
61 throw cms::Exception(
"TritonError") <<
"unable to get shared memory descriptor for key: " + this->
name_;
64 int res = ftruncate(shm_fd, this->size_);
66 throw cms::Exception(
"TritonError") <<
"unable to initialize shared memory key " + this->
name_ +
70 constexpr
size_t offset(0);
71 this->
addr_ = (uint8_t*)mmap(
nullptr, this->size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd,
offset);
72 if (this->
addr_ == MAP_FAILED)
73 throw cms::Exception(
"TritonError") <<
"unable to map to process address space for shared memory key: " +
77 if (::
close(shm_fd) == -1)
78 throw cms::Exception(
"TritonError") <<
"unable to close descriptor for shared memory key: " + this->
name_;
81 "unable to register shared memory region: " + this->
name_);
84 template <
typename IO>
89 template <
typename IO>
95 "unable to unregister shared memory region: " + this->name_);
98 int tmp_fd = munmap(this->addr_, this->size_);
100 throw cms::Exception(
"TritonError") <<
"unable to munmap for shared memory key: " << this->name_;
103 int shm_fd = shm_unlink(this->name_.c_str());
105 throw cms::Exception(
"TritonError") <<
"unable to unlink for shared memory key: " << this->name_;
107 this->closed_ =
true;
126 #ifdef TRITON_ENABLE_GPU 127 template <
typename IO>
133 cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->
addr_),
"unable to get IPC handle for key: " + this->
name_);
135 "unable to register CUDA shared memory region: " + this->
name_);
138 template <
typename IO>
139 TritonGpuShmResource<IO>::~TritonGpuShmResource<IO>() {
143 template <
typename IO>
144 void TritonGpuShmResource<IO>::close() {
148 "unable to unregister CUDA shared memory region: " + this->name_);
149 cudaCheck(cudaFree(this->addr_),
"unable to free GPU memory for key: " + this->name_);
150 this->closed_ =
true;
154 void TritonInputGpuShmResource::copyInput(
const void*
values,
size_t offset) {
156 cudaMemcpy(addr_ +
offset,
values, data_->byteSizePerBatch_, cudaMemcpyHostToDevice),
157 data_->name_ +
" toServer(): unable to memcpy " +
std::to_string(data_->byteSizePerBatch_) +
" bytes to GPU");
161 const uint8_t* TritonOutputGpuShmResource::copyOutput() {
163 auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
165 cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
166 data_->name_ +
" fromServer(): unable to memcpy " +
std::to_string(data_->totalByteSize_) +
" bytes from GPU");
167 data_->holder_ = ptr;
171 template class TritonGpuShmResource<tc::InferInput>;
172 template class TritonGpuShmResource<tc::InferRequestedOutput>;
#define TRITON_THROW_IF_ERROR(X, MSG)
void copyInput(const void *values, size_t offset) override
std::string to_string(const V &value)
TritonCpuShmResource(TritonData< IO > *data, const std::string &name, size_t size)
void copyInput(const void *values, size_t offset) override
TritonMemResource(TritonData< IO > *data, const std::string &name, size_t size)
const uint8_t * copyOutput() override
TritonHeapResource(TritonData< IO > *data, const std::string &name, size_t size)
char data[epos_bytes_allocation]
const uint8_t * copyOutput() override
#define cudaCheck(ARG,...)