CMS 3D CMS Logo

TritonMemResource.cc
Go to the documentation of this file.
6 
7 #include <cstring>
8 #include <fcntl.h>
9 #include <sys/mman.h>
10 #include <unistd.h>
11 
12 namespace tc = triton::client;
13 
14 template <typename IO>
16  : data_(data), name_(name), size_(size), addr_(nullptr), closed_(false) {}
17 
18 template <typename IO>
20  TRITON_THROW_IF_ERROR(data_->data_->SetSharedMemory(name_, data_->totalByteSize_, 0),
21  "unable to set shared memory (" + name_ + ")");
22 }
23 
24 template <typename IO>
26  : TritonMemResource<IO>(data, name, size) {}
27 
28 template <>
29 void TritonInputHeapResource::copyInput(const void* values, size_t offset) {
30  TRITON_THROW_IF_ERROR(data_->data_->AppendRaw(reinterpret_cast<const uint8_t*>(values), data_->byteSizePerBatch_),
31  data_->name_ + " toServer(): unable to set data for batch entry " +
32  (data_->byteSizePerBatch_ ? std::to_string(offset / data_->byteSizePerBatch_) : ""));
33 }
34 
35 template <>
36 const uint8_t* TritonOutputHeapResource::copyOutput() {
37  size_t contentByteSize;
38  const uint8_t* values;
39  TRITON_THROW_IF_ERROR(data_->result_->RawData(data_->name_, &values, &contentByteSize),
40  data_->name_ + " fromServer(): unable to get raw");
41  if (contentByteSize != data_->totalByteSize_) {
42  throw cms::Exception("TritonDataError") << data_->name_ << " fromServer(): unexpected content byte size "
43  << contentByteSize << " (expected " << data_->totalByteSize_ << ")";
44  }
45  return values;
46 }
47 
48 //shared memory helpers based on:
49 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/shm_utils.cc (cpu)
50 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/simple_grpc_cudashm_client.cc (gpu)
51 
52 template <typename IO>
54  : TritonMemResource<IO>(data, name, size) {
55  //mmap of size zero is required to fail by POSIX, but still need to have some shared memory region available for Triton
56  this->size_ = std::max<size_t>(this->size_, 1);
57 
58  //get shared memory region descriptor
59  int shm_fd = shm_open(this->name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
60  if (shm_fd == -1)
61  throw cms::Exception("TritonError") << "unable to get shared memory descriptor for key: " + this->name_;
62 
63  //extend shared memory object
64  int res = ftruncate(shm_fd, this->size_);
65  if (res == -1)
66  throw cms::Exception("TritonError") << "unable to initialize shared memory key " + this->name_ +
67  " to requested size: " + std::to_string(this->size_);
68 
69  //map to process address space
70  constexpr size_t offset(0);
71  this->addr_ = (uint8_t*)mmap(nullptr, this->size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, offset);
72  if (this->addr_ == MAP_FAILED)
73  throw cms::Exception("TritonError") << "unable to map to process address space for shared memory key: " +
74  this->name_;
75 
76  //close descriptor
77  if (::close(shm_fd) == -1)
78  throw cms::Exception("TritonError") << "unable to close descriptor for shared memory key: " + this->name_;
79 
80  TRITON_THROW_IF_ERROR(this->data_->client()->RegisterSystemSharedMemory(this->name_, this->name_, this->size_),
81  "unable to register shared memory region: " + this->name_);
82 }
83 
84 template <typename IO>
85 TritonCpuShmResource<IO>::~TritonCpuShmResource<IO>() {
86  close();
87 }
88 
89 template <typename IO>
91  if (this->closed_)
92  return;
93 
94  TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterSystemSharedMemory(this->name_),
95  "unable to unregister shared memory region: " + this->name_);
96 
97  //unmap
98  int tmp_fd = munmap(this->addr_, this->size_);
99  if (tmp_fd == -1)
100  throw cms::Exception("TritonError") << "unable to munmap for shared memory key: " << this->name_;
101 
102  //unlink
103  int shm_fd = shm_unlink(this->name_.c_str());
104  if (shm_fd == -1)
105  throw cms::Exception("TritonError") << "unable to unlink for shared memory key: " << this->name_;
106 
107  this->closed_ = true;
108 }
109 
110 template <>
111 void TritonInputCpuShmResource::copyInput(const void* values, size_t offset) {
112  if (size_ > 0)
113  std::memcpy(addr_ + offset, values, data_->byteSizePerBatch_);
114 }
115 
116 template <>
118  return addr_;
119 }
120 
125 
126 #ifdef TRITON_ENABLE_GPU
127 template <typename IO>
128 TritonGpuShmResource<IO>::TritonGpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
129  : TritonMemResource<IO>(data, name, size), deviceId_(0), handle_(std::make_shared<cudaIpcMemHandle_t>()) {
130  //todo: get server device id somehow?
131  cudaCheck(cudaSetDevice(deviceId_), "unable to set device ID to " + std::to_string(deviceId_));
132  cudaCheck(cudaMalloc((void**)&this->addr_, this->size_), "unable to allocate GPU memory for key: " + this->name_);
133  cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->addr_), "unable to get IPC handle for key: " + this->name_);
134  TRITON_THROW_IF_ERROR(this->data_->client()->RegisterCudaSharedMemory(this->name_, *handle_, deviceId_, this->size_),
135  "unable to register CUDA shared memory region: " + this->name_);
136 }
137 
138 template <typename IO>
139 TritonGpuShmResource<IO>::~TritonGpuShmResource<IO>() {
140  close();
141 }
142 
143 template <typename IO>
144 void TritonGpuShmResource<IO>::close() {
145  if (this->closed_)
146  return;
147  TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterCudaSharedMemory(this->name_),
148  "unable to unregister CUDA shared memory region: " + this->name_);
149  cudaCheck(cudaFree(this->addr_), "unable to free GPU memory for key: " + this->name_);
150  this->closed_ = true;
151 }
152 
153 template <>
154 void TritonInputGpuShmResource::copyInput(const void* values, size_t offset) {
155  cudaCheck(
156  cudaMemcpy(addr_ + offset, values, data_->byteSizePerBatch_, cudaMemcpyHostToDevice),
157  data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->byteSizePerBatch_) + " bytes to GPU");
158 }
159 
160 template <>
161 const uint8_t* TritonOutputGpuShmResource::copyOutput() {
162  //copy back from gpu, keep in scope
163  auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
164  cudaCheck(
165  cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
166  data_->name_ + " fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) + " bytes from GPU");
167  data_->holder_ = ptr;
168  return ptr->data();
169 }
170 
171 template class TritonGpuShmResource<tc::InferInput>;
172 template class TritonGpuShmResource<tc::InferRequestedOutput>;
173 #endif
size
Write out results.
#define TRITON_THROW_IF_ERROR(X, MSG)
Definition: triton_utils.h:75
TritonData< IO > * data_
void copyInput(const void *values, size_t offset) override
std::string to_string(const V &value)
Definition: OMSAccess.h:71
Definition: Electron.h:6
TritonCpuShmResource(TritonData< IO > *data, const std::string &name, size_t size)
virtual void set()
void copyInput(const void *values, size_t offset) override
TritonMemResource(TritonData< IO > *data, const std::string &name, size_t size)
const uint8_t * copyOutput() override
TritonHeapResource(TritonData< IO > *data, const std::string &name, size_t size)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
const uint8_t * copyOutput() override
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69