14 template <
typename IO>
16 : data_(data), name_(name), size_(size), addr_(nullptr), closed_(
false) {}
18 template <
typename IO>
21 "unable to set shared memory (" + name_ +
")");
24 template <
typename IO>
31 data_->data_->AppendRaw(reinterpret_cast<const uint8_t*>(values),
data_->byteSizePerBatch_),
32 data_->name_ +
" toServer(): unable to set data for batch entry " +
33 std::to_string(offset /
data_->byteSizePerBatch_));
38 size_t contentByteSize;
41 data_->name_ +
" fromServer(): unable to get raw");
42 if (contentByteSize !=
data_->totalByteSize_) {
43 throw cms::Exception(
"TritonDataError") <<
data_->name_ <<
" fromServer(): unexpected content byte size "
44 << contentByteSize <<
" (expected " <<
data_->totalByteSize_ <<
")";
53 template <
typename IO>
57 int shm_fd = shm_open(this->
name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
59 throw cms::Exception(
"TritonError") <<
"unable to get shared memory descriptor for key: " + this->
name_;
62 int res = ftruncate(shm_fd, this->
size_);
64 throw cms::Exception(
"TritonError") <<
"unable to initialize shared memory key " + this->
name_ +
65 " to requested size: " + std::to_string(this->
size_);
68 constexpr
size_t offset(0);
69 this->
addr_ = (uint8_t*)mmap(
nullptr, this->
size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, offset);
70 if (this->
addr_ == MAP_FAILED)
71 throw cms::Exception(
"TritonError") <<
"unable to map to process address space for shared memory key: " +
75 if (::
close(shm_fd) == -1)
76 throw cms::Exception(
"TritonError") <<
"unable to close descriptor for shared memory key: " + this->
name_;
79 "unable to register shared memory region: " + this->
name_);
82 template <
typename IO>
87 template <
typename IO>
93 "unable to unregister shared memory region: " + this->name_);
96 int tmp_fd = munmap(this->addr_, this->size_);
98 throw cms::Exception(
"TritonError") <<
"unable to munmap for shared memory key: " << this->name_;
101 int shm_fd = shm_unlink(this->name_.c_str());
103 throw cms::Exception(
"TritonError") <<
"unable to unlink for shared memory key: " << this->name_;
105 this->closed_ =
true;
110 std::memcpy(
addr_ + offset, values,
data_->byteSizePerBatch_);
123 #ifdef TRITON_ENABLE_GPU
124 template <
typename IO>
126 :
TritonMemResource<IO>(data, name, size), deviceId_(0), handle_(std::make_shared<cudaIpcMemHandle_t>()) {
128 cudaCheck(cudaSetDevice(deviceId_),
"unable to set device ID to " + std::to_string(deviceId_));
130 cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->
addr_),
"unable to get IPC handle for key: " + this->
name_);
132 this->
data_->client()->RegisterCudaSharedMemory(this->
name_, *handle_, deviceId_, this->
size_),
133 "unable to register CUDA shared memory region: " + this->
name_);
136 template <
typename IO>
137 TritonGpuShmResource<IO>::~TritonGpuShmResource<IO>() {
141 template <
typename IO>
142 void TritonGpuShmResource<IO>::close() {
146 "unable to unregister CUDA shared memory region: " + this->name_);
147 cudaCheck(cudaFree(this->addr_),
"unable to free GPU memory for key: " + this->name_);
148 this->closed_ =
true;
152 void TritonInputGpuShmResource::copyInput(
const void* values,
size_t offset) {
154 cudaMemcpy(addr_ + offset, values, data_->byteSizePerBatch_, cudaMemcpyHostToDevice),
155 data_->name_ +
" toServer(): unable to memcpy " + std::to_string(data_->byteSizePerBatch_) +
" bytes to GPU");
159 const uint8_t* TritonOutputGpuShmResource::copyOutput() {
161 auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
163 cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
164 data_->name_ +
" fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) +
" bytes from GPU");
165 data_->holder_ = ptr;
169 template class TritonGpuShmResource<tc::InferInput>;
170 template class TritonGpuShmResource<tc::InferRequestedOutput>;
void copyInput(const void *values, size_t offset) override
void throwIfError(const Error &err, std::string_view msg)
TritonCpuShmResource(TritonData< IO > *data, const std::string &name, size_t size)
void copyInput(const void *values, size_t offset) override
TritonMemResource(TritonData< IO > *data, const std::string &name, size_t size)
const uint8_t * copyOutput() override
TritonHeapResource(TritonData< IO > *data, const std::string &name, size_t size)
char data[epos_bytes_allocation]
const uint8_t * copyOutput() override
#define cudaCheck(ARG,...)
tuple size
Write out results.