CMS 3D CMS Logo

TritonData.cc
Go to the documentation of this file.
5 
6 #include "model_config.pb.h"
7 #include "model_config.h"
8 
9 #include <sstream>
10 
11 namespace ni = nvidia::inferenceserver;
12 namespace tc = triton::client;
13 
14 //dims: kept constant, represents config.pbtxt parameters of model (converted from google::protobuf::RepeatedField to vector)
15 //fullShape: if batching is enabled, first entry is batch size; values can be modified
16 //shape: view into fullShape, excluding batch size entry
17 template <typename IO>
19  const TritonData<IO>::TensorMetadata& model_info,
21  const std::string& pid)
22  : name_(name),
23  client_(client),
24  useShm_(client_->useSharedMemory()),
25  //ensure unique name for shared memory region
26  shmName_(useShm_ ? pid + "_" + xput() + std::to_string(uid()) : ""),
27  dims_(model_info.shape().begin(), model_info.shape().end()),
28  noBatch_(client_->noBatch()),
29  batchSize_(0),
30  fullShape_(dims_),
31  shape_(fullShape_.begin() + (noBatch_ ? 0 : 1), fullShape_.end()),
32  variableDims_(anyNeg(shape_)),
33  productDims_(variableDims_ ? -1 : dimProduct(shape_)),
34  dname_(model_info.datatype()),
35  dtype_(ni::ProtocolStringToDataType(dname_)),
36  byteSize_(ni::GetDataTypeByteSize(dtype_)),
37  totalByteSize_(0) {
38  //create input or output object
39  IO* iotmp;
40  createObject(&iotmp);
41  data_.reset(iotmp);
42 }
43 
44 template <>
45 void TritonInputData::createObject(tc::InferInput** ioptr) {
46  tc::InferInput::Create(ioptr, name_, fullShape_, dname_);
47 }
48 
49 template <>
50 void TritonOutputData::createObject(tc::InferRequestedOutput** ioptr) {
51  tc::InferRequestedOutput::Create(ioptr, name_);
52  //another specialization for output: can't use shared memory if output size is not known
54 }
55 
56 template <>
58  return "input";
59 }
60 
61 template <>
63  return "output";
64 }
65 
66 template <typename IO>
67 tc::InferenceServerGrpcClient* TritonData<IO>::client() {
68  return client_->client();
69 }
70 
71 //setters
72 template <typename IO>
74  for (unsigned i = 0; i < newShape.size(); ++i) {
75  setShape(i, newShape[i]);
76  }
77 }
78 
79 template <typename IO>
80 void TritonData<IO>::setShape(unsigned loc, int64_t val) {
81  unsigned locFull = fullLoc(loc);
82 
83  //check boundary
84  if (locFull >= fullShape_.size())
85  throw cms::Exception("TritonDataError")
86  << name_ << " setShape(): dimension " << locFull << " out of bounds (" << fullShape_.size() << ")";
87 
88  if (val != fullShape_[locFull]) {
89  if (dims_[locFull] == -1)
90  fullShape_[locFull] = val;
91  else
92  throw cms::Exception("TritonDataError")
93  << name_ << " setShape(): attempt to change value of non-variable shape dimension " << loc;
94  }
95 }
96 
97 template <typename IO>
98 void TritonData<IO>::setBatchSize(unsigned bsize) {
99  batchSize_ = bsize;
100  if (!noBatch_)
101  fullShape_[0] = batchSize_;
102 }
103 
104 template <typename IO>
106  sizeShape_ = sizeShape();
107  byteSizePerBatch_ = byteSize_ * sizeShape_;
108  totalByteSize_ = byteSizePerBatch_ * batchSize_;
109 }
110 template <typename IO>
112  sizeShape_ = 0;
113  byteSizePerBatch_ = 0;
114  totalByteSize_ = 0;
115 }
116 
117 //create a memory resource if none exists;
118 //otherwise, reuse the memory resource, resizing it if necessary
119 template <typename IO>
121  if (!memResource_ or size > memResource_->size()) {
122  if (useShm_ and client_->serverType() == TritonServerType::LocalCPU) {
123  //avoid unnecessarily throwing in destructor
124  if (memResource_)
125  memResource_->close();
126  //need to destroy before constructing new instance because shared memory key will be reused
127  memResource_.reset();
128  memResource_ = std::make_shared<TritonCpuShmResource<IO>>(this, shmName_, size);
129  }
130 #ifdef TRITON_ENABLE_GPU
131  else if (useShm_ and client_->serverType() == TritonServerType::LocalGPU) {
132  //avoid unnecessarily throwing in destructor
133  if (memResource_)
134  memResource_->close();
135  //need to destroy before constructing new instance because shared memory key will be reused
136  memResource_.reset();
137  memResource_ = std::make_shared<TritonGpuShmResource<IO>>(this, shmName_, size);
138  }
139 #endif
140  //for remote/heap, size increases don't matter
141  else if (!memResource_)
142  memResource_ = std::make_shared<TritonHeapResource<IO>>(this, shmName_, size);
143  }
144 }
145 
146 //io accessors
147 template <>
148 template <typename DT>
150  //automatically creates a vector for each batch entry (if batch size known)
151  auto ptr = std::make_shared<TritonInput<DT>>(batchSize_);
152  if (reserve and !anyNeg(shape_)) {
153  computeSizes();
154  for (auto& vec : *ptr) {
155  vec.reserve(sizeShape_);
156  }
157  }
158  return ptr;
159 }
160 
161 template <>
162 template <typename DT>
164  //shouldn't be called twice
165  if (done_)
166  throw cms::Exception("TritonDataError") << name_ << " toServer() was already called for this event";
167 
168  const auto& data_in = *ptr;
169 
170  //check batch size
171  if (data_in.size() != batchSize_) {
172  throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size()
173  << " but specified batch size is " << batchSize_;
174  }
175 
176  //shape must be specified for variable dims or if batch size changes
177  data_->SetShape(fullShape_);
178 
179  //check type
180  checkType<DT>();
181 
182  computeSizes();
184  for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
185  memResource_->copyInput(data_in[i0].data(), i0 * byteSizePerBatch_);
186  }
187  memResource_->set();
188 
189  //keep input data in scope
190  holder_ = ptr;
191  done_ = true;
192 }
193 
194 //sets up shared memory for outputs, if possible
195 template <>
197  computeSizes();
199  memResource_->set();
200 }
201 
202 template <>
203 template <typename DT>
205  //shouldn't be called twice
206  if (done_)
207  throw cms::Exception("TritonDataError") << name_ << " fromServer() was already called for this event";
208 
209  if (!result_) {
210  throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";
211  }
212 
213  //check type
214  checkType<DT>();
215 
216  const uint8_t* r0 = memResource_->copyOutput();
217  const DT* r1 = reinterpret_cast<const DT*>(r0);
218 
219  TritonOutput<DT> dataOut;
220  dataOut.reserve(batchSize_);
221  for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
222  auto offset = i0 * sizeShape_;
223  dataOut.emplace_back(r1 + offset, r1 + offset + sizeShape_);
224  }
225 
226  done_ = true;
227  return dataOut;
228 }
229 
230 template <>
232  done_ = false;
233  holder_.reset();
234  data_->Reset();
235  //reset shape
236  if (variableDims_) {
237  for (unsigned i = 0; i < shape_.size(); ++i) {
238  unsigned locFull = fullLoc(i);
239  fullShape_[locFull] = dims_[locFull];
240  }
241  }
242  resetSizes();
243 }
244 
245 template <>
247  done_ = false;
248  result_.reset();
249  holder_.reset();
250  resetSizes();
251 }
252 
253 //explicit template instantiation declarations
254 template class TritonData<tc::InferInput>;
256 
268 
280 
size
Write out results.
std::shared_ptr< IO > data_
Definition: TritonData.h:125
void setBatchSize(unsigned bsize)
Definition: TritonData.cc:98
std::shared_ptr< void > holder_
Definition: TritonData.h:145
bool done_
Definition: TritonData.h:149
size_t totalByteSize_
Definition: TritonData.h:141
unsigned fullLoc(unsigned loc) const
Definition: TritonData.h:92
size_t byteSizePerBatch_
Definition: TritonData.h:140
std::string to_string(const V &value)
Definition: OMSAccess.h:71
void computeSizes()
Definition: TritonData.cc:105
const ShapeType dims_
Definition: TritonData.h:129
bool useShm_
Definition: TritonData.h:127
ShapeView shape_
Definition: TritonData.h:133
void updateMem(size_t size)
Definition: TritonData.cc:120
TritonInputContainer< DT > allocate(bool reserve=true)
Definition: TritonData.cc:149
size_t sizeShape_
Definition: TritonData.h:139
std::shared_ptr< Result > result_
Definition: TritonData.h:147
auto size() const
Definition: Span.h:24
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::vector< edm::Span< const DT * > > TritonOutput
Definition: TritonData.h:37
void reset()
Definition: TritonData.cc:231
IO * data()
Definition: TritonData.h:96
bool anyNeg(const ShapeView &vec) const
Definition: TritonData.h:109
void prepare()
Definition: TritonData.cc:196
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
Definition: TritonData.cc:18
void setShape(const ShapeType &newShape)
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
Definition: TritonData.h:50
void createObject(IO **ioptr)
std::shared_ptr< TritonInput< DT > > TritonInputContainer
Definition: TritonData.h:41
TritonOutput< DT > fromServer() const
Definition: TritonData.cc:204
triton::client::InferenceServerGrpcClient * client()
Definition: TritonData.cc:67
void resetSizes()
Definition: TritonData.cc:111
std::shared_ptr< TritonMemResource< IO > > memResource_
Definition: TritonData.h:146
std::string xput() const
Definition: TritonData.cc:57
std::string dname_
Definition: TritonData.h:136
ShapeType fullShape_
Definition: TritonData.h:132
std::string name_
Definition: TritonData.h:124
std::vector< int64_t > ShapeType
Definition: TritonData.h:51
unsigned batchSize_
Definition: TritonData.h:131
bool variableDims_
Definition: TritonData.h:134
void toServer(TritonInputContainer< DT > ptr)
Definition: TritonData.cc:163