14 template <
typename IO>
18 template <
typename IO>
20 for (
auto&
entry : data_->entries_) {
22 "unable to set shared memory (" + name_ +
")",
27 template <
typename IO>
40 template <
typename IO>
48 data_->name_ +
" toServer(): unable to set data for batch entry " +
58 size_t contentByteSize = 0;
60 size_t contentByteSizeEntry(0);
61 if (
entry.totalByteSize_ > 0)
63 data_->name_ +
" fromServer(): unable to get raw",
65 contentByteSize += contentByteSizeEntry;
67 if (contentByteSize !=
data_->totalByteSize_) {
68 throw cms::Exception(
"TritonDataError") <<
data_->name_ <<
" fromServer(): unexpected content byte size " 69 << contentByteSize <<
" (expected " <<
data_->totalByteSize_ <<
")";
77 template <
typename IO>
81 this->
size_ = std::max<size_t>(this->
size_, 1);
84 int shm_fd = shm_open(this->
name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
86 throw cms::Exception(
"TritonError") <<
"unable to get shared memory descriptor for key: " + this->
name_;
89 int res = ftruncate(shm_fd, this->size_);
91 throw cms::Exception(
"TritonError") <<
"unable to initialize shared memory key " + this->
name_ +
96 this->
addr_ = (uint8_t*)mmap(
nullptr, this->size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd,
offset);
97 if (this->
addr_ == MAP_FAILED)
98 throw cms::Exception(
"TritonError") <<
"unable to map to process address space for shared memory key: " +
102 if (::
close(shm_fd) == -1)
103 throw cms::Exception(
"TritonError") <<
"unable to close descriptor for shared memory key: " + this->
name_;
106 "unable to register shared memory region: " + this->
name_,
110 template <
typename IO>
115 template <
typename IO>
121 "unable to unregister shared memory region: " + this->name_,
125 int tmp_fd = munmap(this->addr_, this->size_);
127 throw cms::Exception(
"TritonError") <<
"unable to munmap for shared memory key: " << this->name_;
130 int shm_fd = shm_unlink(this->name_.c_str());
132 throw cms::Exception(
"TritonError") <<
"unable to unlink for shared memory key: " << this->name_;
134 this->closed_ =
true;
155 #ifdef TRITON_ENABLE_GPU 156 template <
typename IO>
162 cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->
addr_),
"unable to get IPC handle for key: " + this->
name_);
164 "unable to register CUDA shared memory region: " + this->
name_,
168 template <
typename IO>
169 TritonGpuShmResource<IO>::~TritonGpuShmResource() {
173 template <
typename IO>
174 void TritonGpuShmResource<IO>::close() {
178 "unable to unregister CUDA shared memory region: " + this->name_,
180 cudaCheck(cudaFree(this->addr_),
"unable to free GPU memory for key: " + this->name_);
181 this->closed_ =
true;
185 void TritonInputGpuShmResource::copyInput(
const void*
values,
size_t offset,
unsigned entry) {
187 data_->name_ +
" toServer(): unable to memcpy " +
std::to_string(data_->entries_[
entry].byteSizePerBatch_) +
192 void TritonOutputGpuShmResource::copyOutput() {
194 auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
196 cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
197 data_->name_ +
" fromServer(): unable to memcpy " +
std::to_string(data_->totalByteSize_) +
" bytes from GPU");
198 data_->holder_ = ptr;
199 for (
auto&
entry : data_->entries_) {
204 template class TritonGpuShmResource<tc::InferInput>;
205 template class TritonGpuShmResource<tc::InferRequestedOutput>;
void copyInput(const void *values, size_t offset, unsigned entry) override
static std::string to_string(const XMLCh *ch)
TritonCpuShmResource(TritonData< IO > *data, const std::string &name, size_t size)
TritonMemResource(TritonData< IO > *data, const std::string &name, size_t size)
void copyOutput() override
void convertToWarning(const cms::Exception &e)
#define TRITON_THROW_IF_ERROR(X, MSG, NOTIFY)
void copyInput(const void *values, size_t offset, unsigned entry) override
~TritonCpuShmResource() override
TritonHeapResource(TritonData< IO > *data, const std::string &name, size_t size)
char data[epos_bytes_allocation]
void copyOutput() override
#define cudaCheck(ARG,...)
Log< level::Warning, false > LogWarning