CMS 3D CMS Logo

TritonMemResource.cc
Go to the documentation of this file.
6 
7 #include <cstring>
8 #include <fcntl.h>
9 #include <sys/mman.h>
10 #include <unistd.h>
11 
12 namespace tc = triton::client;
13 
14 template <typename IO>
16  : data_(data), name_(name), size_(size), addr_(nullptr), closed_(false) {}
17 
18 template <typename IO>
20  for (auto& entry : data_->entries_) {
21  TRITON_THROW_IF_ERROR(entry.data_->SetSharedMemory(name_, entry.totalByteSize_, entry.offset_),
22  "unable to set shared memory (" + name_ + ")",
23  true);
24  }
25 }
26 
27 template <typename IO>
29  CMS_SA_ALLOW try { close(); } catch (TritonException& e) {
30  e.convertToWarning();
31  } catch (cms::Exception& e) {
33  } catch (std::exception& e) {
34  edm::LogWarning("UnknownFailure") << e.what();
35  } catch (...) {
36  edm::LogWarning("UnknownFailure") << "An unknown exception was thrown";
37  }
38 }
39 
40 template <typename IO>
42  : TritonMemResource<IO>(data, name, size) {}
43 
44 template <>
45 void TritonInputHeapResource::copyInput(const void* values, size_t offset, unsigned entry) {
46  TRITON_THROW_IF_ERROR(data_->entries_[entry].data_->AppendRaw(reinterpret_cast<const uint8_t*>(values),
47  data_->entries_[entry].byteSizePerBatch_),
48  data_->name_ + " toServer(): unable to set data for batch entry " +
49  (data_->entries_.size() > 1 ? std::to_string(entry)
50  : data_->entries_[entry].byteSizePerBatch_
51  ? std::to_string(offset / data_->entries_[entry].byteSizePerBatch_)
52  : ""),
53  false);
54 }
55 
56 template <>
58  size_t contentByteSize = 0;
59  for (auto& entry : data_->entries_) {
60  size_t contentByteSizeEntry(0);
61  if (entry.totalByteSize_ > 0)
62  TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry),
63  data_->name_ + " fromServer(): unable to get raw",
64  false);
65  contentByteSize += contentByteSizeEntry;
66  }
67  if (contentByteSize != data_->totalByteSize_) {
68  throw cms::Exception("TritonDataError") << data_->name_ << " fromServer(): unexpected content byte size "
69  << contentByteSize << " (expected " << data_->totalByteSize_ << ")";
70  }
71 }
72 
73 //shared memory helpers based on:
74 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/shm_utils.cc (cpu)
75 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/simple_grpc_cudashm_client.cc (gpu)
76 
77 template <typename IO>
79  : TritonMemResource<IO>(data, name, size), sizeOrig_(size) {
80  //mmap of size zero is required to fail by POSIX, but still need to have some shared memory region available for Triton
81  this->size_ = std::max<size_t>(this->size_, 1);
82 
83  //get shared memory region descriptor
84  int shm_fd = shm_open(this->name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
85  if (shm_fd == -1)
86  throw cms::Exception("TritonError") << "unable to get shared memory descriptor for key: " + this->name_;
87 
88  //extend shared memory object
89  int res = ftruncate(shm_fd, this->size_);
90  if (res == -1)
91  throw cms::Exception("TritonError") << "unable to initialize shared memory key " + this->name_ +
92  " to requested size: " + std::to_string(this->size_);
93 
94  //map to process address space
95  constexpr size_t offset(0);
96  this->addr_ = (uint8_t*)mmap(nullptr, this->size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, offset);
97  if (this->addr_ == MAP_FAILED)
98  throw cms::Exception("TritonError") << "unable to map to process address space for shared memory key: " +
99  this->name_;
100 
101  //close descriptor
102  if (::close(shm_fd) == -1)
103  throw cms::Exception("TritonError") << "unable to close descriptor for shared memory key: " + this->name_;
104 
105  TRITON_THROW_IF_ERROR(this->data_->client()->RegisterSystemSharedMemory(this->name_, this->name_, this->size_),
106  "unable to register shared memory region: " + this->name_,
107  true);
108 }
109 
110 template <typename IO>
112  this->closeSafe();
113 }
114 
115 template <typename IO>
117  if (this->closed_)
118  return;
119 
120  TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterSystemSharedMemory(this->name_),
121  "unable to unregister shared memory region: " + this->name_,
122  true);
123 
124  //unmap
125  int tmp_fd = munmap(this->addr_, this->size_);
126  if (tmp_fd == -1)
127  throw cms::Exception("TritonError") << "unable to munmap for shared memory key: " << this->name_;
128 
129  //unlink
130  int shm_fd = shm_unlink(this->name_.c_str());
131  if (shm_fd == -1)
132  throw cms::Exception("TritonError") << "unable to unlink for shared memory key: " << this->name_;
133 
134  this->closed_ = true;
135 }
136 
137 template <>
138 void TritonInputCpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
139  if (sizeOrig_ > 0)
140  std::memcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_);
141 }
142 
143 template <>
145  for (auto& entry : data_->entries_) {
146  entry.output_ = addr_ + entry.offset_;
147  }
148 }
149 
154 
155 #ifdef TRITON_ENABLE_GPU
156 template <typename IO>
157 TritonGpuShmResource<IO>::TritonGpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
158  : TritonMemResource<IO>(data, name, size), deviceId_(0), handle_(std::make_shared<cudaIpcMemHandle_t>()) {
159  //todo: get server device id somehow?
160  cudaCheck(cudaSetDevice(deviceId_), "unable to set device ID to " + std::to_string(deviceId_));
161  cudaCheck(cudaMalloc((void**)&this->addr_, this->size_), "unable to allocate GPU memory for key: " + this->name_);
162  cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->addr_), "unable to get IPC handle for key: " + this->name_);
163  TRITON_THROW_IF_ERROR(this->data_->client()->RegisterCudaSharedMemory(this->name_, *handle_, deviceId_, this->size_),
164  "unable to register CUDA shared memory region: " + this->name_,
165  true);
166 }
167 
168 template <typename IO>
169 TritonGpuShmResource<IO>::~TritonGpuShmResource() {
170  this->closeSafe();
171 }
172 
173 template <typename IO>
174 void TritonGpuShmResource<IO>::close() {
175  if (this->closed_)
176  return;
177  TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterCudaSharedMemory(this->name_),
178  "unable to unregister CUDA shared memory region: " + this->name_,
179  true);
180  cudaCheck(cudaFree(this->addr_), "unable to free GPU memory for key: " + this->name_);
181  this->closed_ = true;
182 }
183 
184 template <>
185 void TritonInputGpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
186  cudaCheck(cudaMemcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_, cudaMemcpyHostToDevice),
187  data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->entries_[entry].byteSizePerBatch_) +
188  " bytes to GPU");
189 }
190 
191 template <>
192 void TritonOutputGpuShmResource::copyOutput() {
193  //copy back from gpu, keep in scope
194  auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
195  cudaCheck(
196  cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
197  data_->name_ + " fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) + " bytes from GPU");
198  data_->holder_ = ptr;
199  for (auto& entry : data_->entries_) {
200  entry.output_ = ptr->data() + entry.offset_;
201  }
202 }
203 
204 template class TritonGpuShmResource<tc::InferInput>;
205 template class TritonGpuShmResource<tc::InferRequestedOutput>;
206 #endif
size
Write out results.
#define CMS_SA_ALLOW
void copyInput(const void *values, size_t offset, unsigned entry) override
TritonData< IO > * data_
Definition: Electron.h:6
static std::string to_string(const XMLCh *ch)
TritonCpuShmResource(TritonData< IO > *data, const std::string &name, size_t size)
virtual void set()
TritonMemResource(TritonData< IO > *data, const std::string &name, size_t size)
void copyOutput() override
void convertToWarning(const cms::Exception &e)
Definition: triton_utils.cc:19
#define TRITON_THROW_IF_ERROR(X, MSG, NOTIFY)
Definition: triton_utils.h:78
void copyInput(const void *values, size_t offset, unsigned entry) override
TritonHeapResource(TritonData< IO > *data, const std::string &name, size_t size)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
void copyOutput() override
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
Log< level::Warning, false > LogWarning