14 template <
typename IO>
18 template <
typename IO>
21 "unable to set shared memory (" + name_ +
")");
24 template <
typename IO>
31 data_->data_->AppendRaw(reinterpret_cast<const uint8_t*>(
values),
data_->byteSizePerBatch_),
32 data_->name_ +
" toServer(): unable to set data for batch entry " +
38 size_t contentByteSize;
41 data_->name_ +
" fromServer(): unable to get raw");
42 if (contentByteSize !=
data_->totalByteSize_) {
43 throw cms::Exception(
"TritonDataError") <<
data_->name_ <<
" fromServer(): unexpected content byte size "
44 << contentByteSize <<
" (expected " <<
data_->totalByteSize_ <<
")";
53 template <
typename IO>
57 int shm_fd = shm_open(this->
name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
59 throw cms::Exception(
"TritonError") <<
"unable to get shared memory descriptor for key: " + this->
name_;
62 int res = ftruncate(shm_fd, this->
size_);
64 throw cms::Exception(
"TritonError") <<
"unable to initialize shared memory key " + this->
name_ +
65 " to requested size: " + std::to_string(this->
size_);
68 constexpr
size_t offset(0);
69 this->
addr_ = (uint8_t*)mmap(
nullptr, this->
size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd,
offset);
70 if (this->
addr_ == MAP_FAILED)
71 throw cms::Exception(
"TritonError") <<
"unable to map to process address space for shared memory key: " +
75 if (::
close(shm_fd) == -1)
76 throw cms::Exception(
"TritonError") <<
"unable to close descriptor for shared memory key: " + this->
name_;
79 "unable to register shared memory region: " + this->
name_);
82 template <
typename IO>
87 template <
typename IO>
93 "unable to unregister shared memory region: " + this->name_);
96 int tmp_fd = munmap(this->addr_, this->size_);
98 throw cms::Exception(
"TritonError") <<
"unable to munmap for shared memory key: " << this->name_;
101 int shm_fd = shm_unlink(this->name_.c_str());
103 throw cms::Exception(
"TritonError") <<
"unable to unlink for shared memory key: " << this->name_;
105 this->closed_ =
true;
123 #ifdef TRITON_ENABLE_GPU
124 template <
typename IO>
128 cudaCheck(cudaSetDevice(deviceId_),
"unable to set device ID to " + std::to_string(deviceId_));
130 cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->
addr_),
"unable to get IPC handle for key: " + this->
name_);
132 this->
data_->client()->RegisterCudaSharedMemory(this->
name_, *handle_, deviceId_, this->
size_),
133 "unable to register CUDA shared memory region: " + this->
name_);
136 template <
typename IO>
137 TritonGpuShmResource<IO>::~TritonGpuShmResource<IO>() {
141 template <
typename IO>
142 void TritonGpuShmResource<IO>::close() {
146 "unable to unregister CUDA shared memory region: " + this->name_);
147 cudaCheck(cudaFree(this->addr_),
"unable to free GPU memory for key: " + this->name_);
148 this->closed_ =
true;
152 void TritonInputGpuShmResource::copyInput(
const void*
values,
size_t offset) {
154 cudaMemcpy(addr_ +
offset,
values, data_->byteSizePerBatch_, cudaMemcpyHostToDevice),
155 data_->name_ +
" toServer(): unable to memcpy " + std::to_string(data_->byteSizePerBatch_) +
" bytes to GPU");
159 const uint8_t* TritonOutputGpuShmResource::copyOutput() {
161 auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
163 cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
164 data_->name_ +
" fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) +
" bytes from GPU");
165 data_->holder_ = ptr;
169 template class TritonGpuShmResource<tc::InferInput>;
170 template class TritonGpuShmResource<tc::InferRequestedOutput>;