CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
TritonData.cc
Go to the documentation of this file.
6 
7 #include "model_config.pb.h"
8 #include "model_config.h"
9 
10 #include <sstream>
11 
12 namespace ni = nvidia::inferenceserver;
13 namespace tc = triton::client;
14 
15 //dims: kept constant, represents config.pbtxt parameters of model (converted from google::protobuf::RepeatedField to vector)
16 //fullShape: if batching is enabled, first entry is batch size; values can be modified
17 //shape: view into fullShape, excluding batch size entry
18 template <typename IO>
20  const TritonData<IO>::TensorMetadata& model_info,
22  const std::string& pid)
23  : name_(name),
24  client_(client),
25  useShm_(client_->useSharedMemory()),
26  //ensure unique name for shared memory region
27  shmName_(useShm_ ? pid + "_" + xput() + std::to_string(uid()) : ""),
28  dims_(model_info.shape().begin(), model_info.shape().end()),
29  noBatch_(client_->noBatch()),
30  batchSize_(0),
31  fullShape_(dims_),
32  shape_(fullShape_.begin() + (noBatch_ ? 0 : 1), fullShape_.end()),
33  variableDims_(anyNeg(shape_)),
34  productDims_(variableDims_ ? -1 : dimProduct(shape_)),
35  dname_(model_info.datatype()),
36  dtype_(ni::ProtocolStringToDataType(dname_)),
37  byteSize_(ni::GetDataTypeByteSize(dtype_)),
38  totalByteSize_(0) {
39  //create input or output object
40  IO* iotmp;
41  createObject(&iotmp);
42  data_.reset(iotmp);
43 }
44 
45 template <>
46 void TritonInputData::createObject(tc::InferInput** ioptr) {
47  tc::InferInput::Create(ioptr, name_, fullShape_, dname_);
48 }
49 
50 template <>
51 void TritonOutputData::createObject(tc::InferRequestedOutput** ioptr) {
52  tc::InferRequestedOutput::Create(ioptr, name_);
53  //another specialization for output: can't use shared memory if output size is not known
55 }
56 
57 template <>
59  return "input";
60 }
61 
62 template <>
64  return "output";
65 }
66 
67 template <typename IO>
68 tc::InferenceServerGrpcClient* TritonData<IO>::client() {
69  return client_->client();
70 }
71 
72 //setters
73 template <typename IO>
75  for (unsigned i = 0; i < newShape.size(); ++i) {
76  setShape(i, newShape[i]);
77  }
78 }
79 
80 template <typename IO>
81 void TritonData<IO>::setShape(unsigned loc, int64_t val) {
82  unsigned locFull = fullLoc(loc);
83 
84  //check boundary
85  if (locFull >= fullShape_.size())
86  throw cms::Exception("TritonDataError")
87  << name_ << " setShape(): dimension " << locFull << " out of bounds (" << fullShape_.size() << ")";
88 
89  if (val != fullShape_[locFull]) {
90  if (dims_[locFull] == -1)
91  fullShape_[locFull] = val;
92  else
93  throw cms::Exception("TritonDataError")
94  << name_ << " setShape(): attempt to change value of non-variable shape dimension " << loc;
95  }
96 }
97 
98 template <typename IO>
99 void TritonData<IO>::setBatchSize(unsigned bsize) {
100  batchSize_ = bsize;
101  if (!noBatch_)
102  fullShape_[0] = batchSize_;
103 }
104 
105 template <typename IO>
107  sizeShape_ = sizeShape();
108  byteSizePerBatch_ = byteSize_ * sizeShape_;
109  totalByteSize_ = byteSizePerBatch_ * batchSize_;
110 }
111 template <typename IO>
113  sizeShape_ = 0;
114  byteSizePerBatch_ = 0;
115  totalByteSize_ = 0;
116 }
117 
118 //create a memory resource if none exists;
119 //otherwise, reuse the memory resource, resizing it if necessary
120 template <typename IO>
122  if (!memResource_ or size > memResource_->size()) {
123  if (useShm_ and client_->serverType() == TritonServerType::LocalCPU) {
124  //avoid unnecessarily throwing in destructor
125  if (memResource_)
126  memResource_->close();
127  //need to destroy before constructing new instance because shared memory key will be reused
128  memResource_.reset();
129  memResource_ = std::make_shared<TritonCpuShmResource<IO>>(this, shmName_, size);
130  }
131 #ifdef TRITON_ENABLE_GPU
132  else if (useShm_ and client_->serverType() == TritonServerType::LocalGPU) {
133  //avoid unnecessarily throwing in destructor
134  if (memResource_)
135  memResource_->close();
136  //need to destroy before constructing new instance because shared memory key will be reused
137  memResource_.reset();
138  memResource_ = std::make_shared<TritonGpuShmResource<IO>>(this, shmName_, size);
139  }
140 #endif
141  //for remote/heap, size increases don't matter
142  else if (!memResource_)
143  memResource_ = std::make_shared<TritonHeapResource<IO>>(this, shmName_, size);
144  }
145 }
146 
147 //io accessors
148 template <>
149 template <typename DT>
151  //automatically creates a vector for each batch entry (if batch size known)
152  auto ptr = std::make_shared<TritonInput<DT>>(batchSize_);
153  if (reserve and !anyNeg(shape_)) {
154  computeSizes();
155  for (auto& vec : *ptr) {
156  vec.reserve(sizeShape_);
157  }
158  }
159  return ptr;
160 }
161 
162 template <>
163 template <typename DT>
165  //shouldn't be called twice
166  if (done_)
167  throw cms::Exception("TritonDataError") << name_ << " toServer() was already called for this event";
168 
169  const auto& data_in = *ptr;
170 
171  //check batch size
172  if (data_in.size() != batchSize_) {
173  throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size()
174  << " but specified batch size is " << batchSize_;
175  }
176 
177  //shape must be specified for variable dims or if batch size changes
178  data_->SetShape(fullShape_);
179 
180  if (byteSize_ != sizeof(DT))
181  throw cms::Exception("TritonDataError") << name_ << " toServer(): inconsistent byte size " << sizeof(DT)
182  << " (should be " << byteSize_ << " for " << dname_ << ")";
183 
184  computeSizes();
186  for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
187  memResource_->copyInput(data_in[i0].data(), i0 * byteSizePerBatch_);
188  }
189  memResource_->set();
190 
191  //keep input data in scope
192  holder_ = ptr;
193  done_ = true;
194 }
195 
196 //sets up shared memory for outputs, if possible
197 template <>
199  computeSizes();
201  memResource_->set();
202 }
203 
204 template <>
205 template <typename DT>
207  //shouldn't be called twice
208  if (done_)
209  throw cms::Exception("TritonDataError") << name_ << " fromServer() was already called for this event";
210 
211  if (!result_) {
212  throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";
213  }
214 
215  if (byteSize_ != sizeof(DT)) {
216  throw cms::Exception("TritonDataError") << name_ << " fromServer(): inconsistent byte size " << sizeof(DT)
217  << " (should be " << byteSize_ << " for " << dname_ << ")";
218  }
219 
220  const uint8_t* r0 = memResource_->copyOutput();
221  const DT* r1 = reinterpret_cast<const DT*>(r0);
222 
223  TritonOutput<DT> dataOut;
224  dataOut.reserve(batchSize_);
225  for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
226  auto offset = i0 * sizeShape_;
227  dataOut.emplace_back(r1 + offset, r1 + offset + sizeShape_);
228  }
229 
230  done_ = true;
231  return dataOut;
232 }
233 
234 template <>
236  done_ = false;
237  holder_.reset();
238  data_->Reset();
239  //reset shape
240  if (variableDims_) {
241  for (unsigned i = 0; i < shape_.size(); ++i) {
242  unsigned locFull = fullLoc(i);
243  fullShape_[locFull] = dims_[locFull];
244  }
245  }
246  resetSizes();
247 }
248 
249 template <>
251  done_ = false;
252  result_.reset();
253  holder_.reset();
254  resetSizes();
255 }
256 
257 //explicit template instantiation declarations
258 template class TritonData<tc::InferInput>;
260 
263 
266 
std::shared_ptr< IO > data_
Definition: TritonData.h:117
void setBatchSize(unsigned bsize)
Definition: TritonData.cc:99
std::shared_ptr< TritonInput< DT >> TritonInputContainer
Definition: TritonData.h:39
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventIDconst &, edm::Timestampconst & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::shared_ptr< void > holder_
Definition: TritonData.h:137
bool done_
Definition: TritonData.h:141
size_t totalByteSize_
Definition: TritonData.h:133
TritonInputContainer< DT > allocate(bool reserve=true)
Definition: TritonData.cc:150
size_t byteSizePerBatch_
Definition: TritonData.h:132
unsigned fullLoc(unsigned loc) const
Definition: TritonData.h:90
void computeSizes()
Definition: TritonData.cc:106
const ShapeType dims_
Definition: TritonData.h:121
std::string xput() const
Definition: TritonData.cc:58
bool useShm_
Definition: TritonData.h:119
ShapeView shape_
Definition: TritonData.h:125
void updateMem(size_t size)
Definition: TritonData.cc:121
bool anyNeg(const ShapeView &vec) const
Definition: TritonData.h:101
int64_t byteSize_
Definition: TritonData.h:130
size_t sizeShape_
Definition: TritonData.h:131
std::shared_ptr< Result > result_
Definition: TritonData.h:139
void reset()
Definition: TritonData.cc:235
IO * data()
Definition: TritonData.h:94
void prepare()
Definition: TritonData.cc:198
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
Definition: TritonData.cc:19
void setShape(const ShapeType &newShape)
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
Definition: TritonData.h:48
void createObject(IO **ioptr)
TritonOutput< DT > fromServer() const
Definition: TritonData.cc:206
triton::client::InferenceServerGrpcClient * client()
Definition: TritonData.cc:68
void resetSizes()
Definition: TritonData.cc:112
std::vector< edm::Span< const DT * >> TritonOutput
Definition: TritonData.h:35
std::shared_ptr< TritonMemResource< IO > > memResource_
Definition: TritonData.h:138
std::string dname_
Definition: TritonData.h:128
string end
Definition: dataset.py:937
ShapeType fullShape_
Definition: TritonData.h:124
std::string name_
Definition: TritonData.h:116
std::vector< int64_t > ShapeType
Definition: TritonData.h:49
unsigned batchSize_
Definition: TritonData.h:123
bool variableDims_
Definition: TritonData.h:126
void toServer(TritonInputContainer< DT > ptr)
Definition: TritonData.cc:164
auto size() const
Definition: Span.h:24
tuple size
Write out results.