CMS 3D CMS Logo

TritonMemResource.cc
Go to the documentation of this file.
6 
7 #include <cstring>
8 #include <fcntl.h>
9 #include <sys/mman.h>
10 #include <unistd.h>
11 
12 namespace tc = triton::client;
13 
14 template <typename IO>
16  : data_(data), name_(name), size_(size), addr_(nullptr), closed_(false) {}
17 
18 template <typename IO>
20  for (auto& entry : data_->entries_) {
21  TRITON_THROW_IF_ERROR(entry.data_->SetSharedMemory(name_, entry.totalByteSize_, entry.offset_),
22  "unable to set shared memory (" + name_ + ")");
23  }
24 }
25 
26 template <typename IO>
28  : TritonMemResource<IO>(data, name, size) {}
29 
30 template <>
31 void TritonInputHeapResource::copyInput(const void* values, size_t offset, unsigned entry) {
32  TRITON_THROW_IF_ERROR(data_->entries_[entry].data_->AppendRaw(reinterpret_cast<const uint8_t*>(values),
33  data_->entries_[entry].byteSizePerBatch_),
34  data_->name_ + " toServer(): unable to set data for batch entry " +
35  (data_->entries_.size() > 1 ? std::to_string(entry)
36  : data_->entries_[entry].byteSizePerBatch_
37  ? std::to_string(offset / data_->entries_[entry].byteSizePerBatch_)
38  : ""));
39 }
40 
41 template <>
43  size_t contentByteSize = 0;
44  for (auto& entry : data_->entries_) {
45  size_t contentByteSizeEntry(0);
46  if (entry.totalByteSize_ > 0)
47  TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry),
48  data_->name_ + " fromServer(): unable to get raw");
49  contentByteSize += contentByteSizeEntry;
50  }
51  if (contentByteSize != data_->totalByteSize_) {
52  throw cms::Exception("TritonDataError") << data_->name_ << " fromServer(): unexpected content byte size "
53  << contentByteSize << " (expected " << data_->totalByteSize_ << ")";
54  }
55 }
56 
57 //shared memory helpers based on:
58 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/shm_utils.cc (cpu)
59 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/simple_grpc_cudashm_client.cc (gpu)
60 
61 template <typename IO>
63  : TritonMemResource<IO>(data, name, size), sizeOrig_(size) {
64  //mmap of size zero is required to fail by POSIX, but still need to have some shared memory region available for Triton
65  this->size_ = std::max<size_t>(this->size_, 1);
66 
67  //get shared memory region descriptor
68  int shm_fd = shm_open(this->name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
69  if (shm_fd == -1)
70  throw cms::Exception("TritonError") << "unable to get shared memory descriptor for key: " + this->name_;
71 
72  //extend shared memory object
73  int res = ftruncate(shm_fd, this->size_);
74  if (res == -1)
75  throw cms::Exception("TritonError") << "unable to initialize shared memory key " + this->name_ +
76  " to requested size: " + std::to_string(this->size_);
77 
78  //map to process address space
79  constexpr size_t offset(0);
80  this->addr_ = (uint8_t*)mmap(nullptr, this->size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, offset);
81  if (this->addr_ == MAP_FAILED)
82  throw cms::Exception("TritonError") << "unable to map to process address space for shared memory key: " +
83  this->name_;
84 
85  //close descriptor
86  if (::close(shm_fd) == -1)
87  throw cms::Exception("TritonError") << "unable to close descriptor for shared memory key: " + this->name_;
88 
89  TRITON_THROW_IF_ERROR(this->data_->client()->RegisterSystemSharedMemory(this->name_, this->name_, this->size_),
90  "unable to register shared memory region: " + this->name_);
91 }
92 
93 template <typename IO>
95  close();
96 }
97 
98 template <typename IO>
100  if (this->closed_)
101  return;
102 
103  TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterSystemSharedMemory(this->name_),
104  "unable to unregister shared memory region: " + this->name_);
105 
106  //unmap
107  int tmp_fd = munmap(this->addr_, this->size_);
108  if (tmp_fd == -1)
109  throw cms::Exception("TritonError") << "unable to munmap for shared memory key: " << this->name_;
110 
111  //unlink
112  int shm_fd = shm_unlink(this->name_.c_str());
113  if (shm_fd == -1)
114  throw cms::Exception("TritonError") << "unable to unlink for shared memory key: " << this->name_;
115 
116  this->closed_ = true;
117 }
118 
119 template <>
120 void TritonInputCpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
121  if (sizeOrig_ > 0)
122  std::memcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_);
123 }
124 
125 template <>
127  for (auto& entry : data_->entries_) {
128  entry.output_ = addr_ + entry.offset_;
129  }
130 }
131 
136 
137 #ifdef TRITON_ENABLE_GPU
138 template <typename IO>
139 TritonGpuShmResource<IO>::TritonGpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
140  : TritonMemResource<IO>(data, name, size), deviceId_(0), handle_(std::make_shared<cudaIpcMemHandle_t>()) {
141  //todo: get server device id somehow?
142  cudaCheck(cudaSetDevice(deviceId_), "unable to set device ID to " + std::to_string(deviceId_));
143  cudaCheck(cudaMalloc((void**)&this->addr_, this->size_), "unable to allocate GPU memory for key: " + this->name_);
144  cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->addr_), "unable to get IPC handle for key: " + this->name_);
145  TRITON_THROW_IF_ERROR(this->data_->client()->RegisterCudaSharedMemory(this->name_, *handle_, deviceId_, this->size_),
146  "unable to register CUDA shared memory region: " + this->name_);
147 }
148 
149 template <typename IO>
150 TritonGpuShmResource<IO>::~TritonGpuShmResource() {
151  close();
152 }
153 
154 template <typename IO>
155 void TritonGpuShmResource<IO>::close() {
156  if (this->closed_)
157  return;
158  TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterCudaSharedMemory(this->name_),
159  "unable to unregister CUDA shared memory region: " + this->name_);
160  cudaCheck(cudaFree(this->addr_), "unable to free GPU memory for key: " + this->name_);
161  this->closed_ = true;
162 }
163 
164 template <>
165 void TritonInputGpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
166  cudaCheck(cudaMemcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_, cudaMemcpyHostToDevice),
167  data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->entries_[entry].byteSizePerBatch_) +
168  " bytes to GPU");
169 }
170 
171 template <>
172 void TritonOutputGpuShmResource::copyOutput() {
173  //copy back from gpu, keep in scope
174  auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
175  cudaCheck(
176  cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
177  data_->name_ + " fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) + " bytes from GPU");
178  data_->holder_ = ptr;
179  for (auto& entry : data_->entries_) {
180  entry.output_ = ptr->data() + entry.offset_;
181  }
182 }
183 
184 template class TritonGpuShmResource<tc::InferInput>;
185 template class TritonGpuShmResource<tc::InferRequestedOutput>;
186 #endif
size
Write out results.
void copyInput(const void *values, size_t offset, unsigned entry) override
#define TRITON_THROW_IF_ERROR(X, MSG)
Definition: triton_utils.h:75
TritonData< IO > * data_
Definition: Electron.h:6
static std::string to_string(const XMLCh *ch)
TritonCpuShmResource(TritonData< IO > *data, const std::string &name, size_t size)
virtual void set()
TritonMemResource(TritonData< IO > *data, const std::string &name, size_t size)
void copyOutput() override
void copyInput(const void *values, size_t offset, unsigned entry) override
~TritonCpuShmResource() override
TritonHeapResource(TritonData< IO > *data, const std::string &name, size_t size)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
void copyOutput() override
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69