14 template <
typename IO>
18 template <
typename IO>
20 for (
auto&
entry : data_->entries_) {
22 "unable to set shared memory (" + name_ +
")");
26 template <
typename IO>
34 data_->name_ +
" toServer(): unable to set data for batch entry " +
43 size_t contentByteSize = 0;
45 size_t contentByteSizeEntry(0);
46 if (
entry.totalByteSize_ > 0)
48 data_->name_ +
" fromServer(): unable to get raw");
49 contentByteSize += contentByteSizeEntry;
51 if (contentByteSize !=
data_->totalByteSize_) {
52 throw cms::Exception(
"TritonDataError") <<
data_->name_ <<
" fromServer(): unexpected content byte size " 53 << contentByteSize <<
" (expected " <<
data_->totalByteSize_ <<
")";
61 template <
typename IO>
65 this->
size_ = std::max<size_t>(this->
size_, 1);
68 int shm_fd = shm_open(this->
name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
70 throw cms::Exception(
"TritonError") <<
"unable to get shared memory descriptor for key: " + this->
name_;
73 int res = ftruncate(shm_fd, this->size_);
75 throw cms::Exception(
"TritonError") <<
"unable to initialize shared memory key " + this->
name_ +
79 constexpr
size_t offset(0);
80 this->
addr_ = (uint8_t*)mmap(
nullptr, this->size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd,
offset);
81 if (this->
addr_ == MAP_FAILED)
82 throw cms::Exception(
"TritonError") <<
"unable to map to process address space for shared memory key: " +
86 if (::
close(shm_fd) == -1)
87 throw cms::Exception(
"TritonError") <<
"unable to close descriptor for shared memory key: " + this->
name_;
90 "unable to register shared memory region: " + this->
name_);
93 template <
typename IO>
98 template <
typename IO>
104 "unable to unregister shared memory region: " + this->name_);
107 int tmp_fd = munmap(this->addr_, this->size_);
109 throw cms::Exception(
"TritonError") <<
"unable to munmap for shared memory key: " << this->name_;
112 int shm_fd = shm_unlink(this->name_.c_str());
114 throw cms::Exception(
"TritonError") <<
"unable to unlink for shared memory key: " << this->name_;
116 this->closed_ =
true;
137 #ifdef TRITON_ENABLE_GPU 138 template <
typename IO>
144 cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->
addr_),
"unable to get IPC handle for key: " + this->
name_);
146 "unable to register CUDA shared memory region: " + this->
name_);
149 template <
typename IO>
150 TritonGpuShmResource<IO>::~TritonGpuShmResource() {
154 template <
typename IO>
155 void TritonGpuShmResource<IO>::close() {
159 "unable to unregister CUDA shared memory region: " + this->name_);
160 cudaCheck(cudaFree(this->addr_),
"unable to free GPU memory for key: " + this->name_);
161 this->closed_ =
true;
165 void TritonInputGpuShmResource::copyInput(
const void*
values,
size_t offset,
unsigned entry) {
167 data_->name_ +
" toServer(): unable to memcpy " +
std::to_string(data_->entries_[
entry].byteSizePerBatch_) +
172 void TritonOutputGpuShmResource::copyOutput() {
174 auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
176 cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
177 data_->name_ +
" fromServer(): unable to memcpy " +
std::to_string(data_->totalByteSize_) +
" bytes from GPU");
178 data_->holder_ = ptr;
179 for (
auto&
entry : data_->entries_) {
184 template class TritonGpuShmResource<tc::InferInput>;
185 template class TritonGpuShmResource<tc::InferRequestedOutput>;
void copyInput(const void *values, size_t offset, unsigned entry) override
#define TRITON_THROW_IF_ERROR(X, MSG)
static std::string to_string(const XMLCh *ch)
TritonCpuShmResource(TritonData< IO > *data, const std::string &name, size_t size)
TritonMemResource(TritonData< IO > *data, const std::string &name, size_t size)
void copyOutput() override
void copyInput(const void *values, size_t offset, unsigned entry) override
~TritonCpuShmResource() override
TritonHeapResource(TritonData< IO > *data, const std::string &name, size_t size)
char data[epos_bytes_allocation]
void copyOutput() override
#define cudaCheck(ARG,...)