CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
TritonMemResource.cc
Go to the documentation of this file.
6 
7 #include <cstring>
8 #include <fcntl.h>
9 #include <sys/mman.h>
10 #include <unistd.h>
11 
12 namespace tc = triton::client;
13 
14 template <typename IO>
16  : data_(data), name_(name), size_(size), addr_(nullptr), closed_(false) {}
17 
18 template <typename IO>
20  triton_utils::throwIfError(data_->data_->SetSharedMemory(name_, data_->totalByteSize_, 0),
21  "unable to set shared memory (" + name_ + ")");
22 }
23 
24 template <typename IO>
26  : TritonMemResource<IO>(data, name, size) {}
27 
28 template <>
29 void TritonInputHeapResource::copyInput(const void* values, size_t offset) {
31  data_->data_->AppendRaw(reinterpret_cast<const uint8_t*>(values), data_->byteSizePerBatch_),
32  data_->name_ + " toServer(): unable to set data for batch entry " +
33  std::to_string(offset / data_->byteSizePerBatch_));
34 }
35 
36 template <>
37 const uint8_t* TritonOutputHeapResource::copyOutput() {
38  size_t contentByteSize;
39  const uint8_t* values;
40  triton_utils::throwIfError(data_->result_->RawData(data_->name_, &values, &contentByteSize),
41  data_->name_ + " fromServer(): unable to get raw");
42  if (contentByteSize != data_->totalByteSize_) {
43  throw cms::Exception("TritonDataError") << data_->name_ << " fromServer(): unexpected content byte size "
44  << contentByteSize << " (expected " << data_->totalByteSize_ << ")";
45  }
46  return values;
47 }
48 
49 //shared memory helpers based on:
50 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/shm_utils.cc (cpu)
51 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/simple_grpc_cudashm_client.cc (gpu)
52 
53 template <typename IO>
55  : TritonMemResource<IO>(data, name, size) {
56  //get shared memory region descriptor
57  int shm_fd = shm_open(this->name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
58  if (shm_fd == -1)
59  throw cms::Exception("TritonError") << "unable to get shared memory descriptor for key: " + this->name_;
60 
61  //extend shared memory object
62  int res = ftruncate(shm_fd, this->size_);
63  if (res == -1)
64  throw cms::Exception("TritonError") << "unable to initialize shared memory key " + this->name_ +
65  " to requested size: " + std::to_string(this->size_);
66 
67  //map to process address space
68  constexpr size_t offset(0);
69  this->addr_ = (uint8_t*)mmap(nullptr, this->size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, offset);
70  if (this->addr_ == MAP_FAILED)
71  throw cms::Exception("TritonError") << "unable to map to process address space for shared memory key: " +
72  this->name_;
73 
74  //close descriptor
75  if (::close(shm_fd) == -1)
76  throw cms::Exception("TritonError") << "unable to close descriptor for shared memory key: " + this->name_;
77 
78  triton_utils::throwIfError(this->data_->client()->RegisterSystemSharedMemory(this->name_, this->name_, this->size_),
79  "unable to register shared memory region: " + this->name_);
80 }
81 
82 template <typename IO>
83 TritonCpuShmResource<IO>::~TritonCpuShmResource<IO>() {
84  close();
85 }
86 
87 template <typename IO>
89  if (this->closed_)
90  return;
91 
92  triton_utils::throwIfError(this->data_->client()->UnregisterSystemSharedMemory(this->name_),
93  "unable to unregister shared memory region: " + this->name_);
94 
95  //unmap
96  int tmp_fd = munmap(this->addr_, this->size_);
97  if (tmp_fd == -1)
98  throw cms::Exception("TritonError") << "unable to munmap for shared memory key: " << this->name_;
99 
100  //unlink
101  int shm_fd = shm_unlink(this->name_.c_str());
102  if (shm_fd == -1)
103  throw cms::Exception("TritonError") << "unable to unlink for shared memory key: " << this->name_;
104 
105  this->closed_ = true;
106 }
107 
108 template <>
109 void TritonInputCpuShmResource::copyInput(const void* values, size_t offset) {
110  std::memcpy(addr_ + offset, values, data_->byteSizePerBatch_);
111 }
112 
113 template <>
115  return addr_;
116 }
117 
122 
123 #ifdef TRITON_ENABLE_GPU
124 template <typename IO>
125 TritonGpuShmResource<IO>::TritonGpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
126  : TritonMemResource<IO>(data, name, size), deviceId_(0), handle_(std::make_shared<cudaIpcMemHandle_t>()) {
127  //todo: get server device id somehow?
128  cudaCheck(cudaSetDevice(deviceId_), "unable to set device ID to " + std::to_string(deviceId_));
129  cudaCheck(cudaMalloc((void**)&this->addr_, this->size_), "unable to allocate GPU memory for key: " + this->name_);
130  cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->addr_), "unable to get IPC handle for key: " + this->name_);
132  this->data_->client()->RegisterCudaSharedMemory(this->name_, *handle_, deviceId_, this->size_),
133  "unable to register CUDA shared memory region: " + this->name_);
134 }
135 
136 template <typename IO>
137 TritonGpuShmResource<IO>::~TritonGpuShmResource<IO>() {
138  close();
139 }
140 
141 template <typename IO>
142 void TritonGpuShmResource<IO>::close() {
143  if (this->closed_)
144  return;
145  triton_utils::throwIfError(this->data_->client()->UnregisterCudaSharedMemory(this->name_),
146  "unable to unregister CUDA shared memory region: " + this->name_);
147  cudaCheck(cudaFree(this->addr_), "unable to free GPU memory for key: " + this->name_);
148  this->closed_ = true;
149 }
150 
151 template <>
152 void TritonInputGpuShmResource::copyInput(const void* values, size_t offset) {
153  cudaCheck(
154  cudaMemcpy(addr_ + offset, values, data_->byteSizePerBatch_, cudaMemcpyHostToDevice),
155  data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->byteSizePerBatch_) + " bytes to GPU");
156 }
157 
158 template <>
159 const uint8_t* TritonOutputGpuShmResource::copyOutput() {
160  //copy back from gpu, keep in scope
161  auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
162  cudaCheck(
163  cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
164  data_->name_ + " fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) + " bytes from GPU");
165  data_->holder_ = ptr;
166  return ptr->data();
167 }
168 
169 template class TritonGpuShmResource<tc::InferInput>;
170 template class TritonGpuShmResource<tc::InferRequestedOutput>;
171 #endif
TritonData< IO > * data_
void copyInput(const void *values, size_t offset) override
void throwIfError(const Error &err, std::string_view msg)
Definition: triton_utils.cc:21
TritonCpuShmResource(TritonData< IO > *data, const std::string &name, size_t size)
virtual void set()
void copyInput(const void *values, size_t offset) override
TritonMemResource(TritonData< IO > *data, const std::string &name, size_t size)
const uint8_t * copyOutput() override
TritonHeapResource(TritonData< IO > *data, const std::string &name, size_t size)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
const uint8_t * copyOutput() override
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
tuple size
Write out results.