CMS 3D CMS Logo

TritonData.cc
Go to the documentation of this file.
5 
6 #include "model_config.pb.h"
7 #include "triton/common/model_config.h"
8 
9 #include <sstream>
10 
11 namespace tco = triton::common;
12 namespace tc = triton::client;
13 
14 //dims: kept constant, represents config.pbtxt parameters of model (converted from google::protobuf::RepeatedField to vector)
15 //fullShape: if batching is enabled, first entry is batch size; values can be modified
16 //shape: view into fullShape, excluding batch size entry
17 template <typename IO>
19  const TritonData<IO>::TensorMetadata& model_info,
21  const std::string& pid)
22  : name_(name),
23  client_(client),
24  useShm_(client_->useSharedMemory()),
25  //ensure unique name for shared memory region
26  shmName_(useShm_ ? pid + "_" + xput() + std::to_string(uid()) : ""),
27  dims_(model_info.shape().begin(), model_info.shape().end()),
28  dname_(model_info.datatype()),
29  dtype_(tco::ProtocolStringToDataType(dname_)),
30  byteSize_(tco::GetDataTypeByteSize(dtype_)),
31  totalByteSize_(0) {
32  //initialize first shape entry
33  addEntryImpl(0);
34  //one-time computation of some shape info
35  variableDims_ = anyNeg(entries_.front().shape_);
36  productDims_ = variableDims_ ? -1 : dimProduct(entries_.front().shape_);
37  checkShm();
38 }
39 
40 template <>
42  //another specialization for output: can't use shared memory if output size is not known
44 }
45 
46 template <typename IO>
48  //ensures consistency among all inputs
49  client_->addEntry(entry);
50 }
51 
52 template <typename IO>
54  if (entry >= entries_.size()) {
55  entries_.reserve(entry + 1);
56  for (unsigned i = entries_.size(); i < entry + 1; ++i) {
57  entries_.emplace_back(dims_, client_->noOuterDim(), name_, dname_);
58  }
59  }
60 }
61 
62 template <>
63 void TritonInputData::TritonDataEntry::createObject(tc::InferInput** ioptr,
64  const std::string& name,
65  const std::string& dname) {
66  tc::InferInput::Create(ioptr, name, fullShape_, dname);
67 }
68 
69 template <>
70 void TritonOutputData::TritonDataEntry::createObject(tc::InferRequestedOutput** ioptr,
71  const std::string& name,
72  const std::string& dname) {
73  tc::InferRequestedOutput::Create(ioptr, name);
74 }
75 
76 template <>
78  return "input";
79 }
80 
81 template <>
83  return "output";
84 }
85 
86 template <typename IO>
87 tc::InferenceServerGrpcClient* TritonData<IO>::client() {
88  return client_->client();
89 }
90 
91 //setters
92 template <typename IO>
93 void TritonData<IO>::setShape(const TritonData<IO>::ShapeType& newShape, unsigned entry) {
94  addEntry(entry);
95  for (unsigned i = 0; i < newShape.size(); ++i) {
96  setShape(i, newShape[i], entry);
97  }
98 }
99 
100 template <typename IO>
101 void TritonData<IO>::setShape(unsigned loc, int64_t val, unsigned entry) {
102  addEntry(entry);
103 
104  unsigned locFull = fullLoc(loc);
105 
106  //check boundary
107  if (locFull >= entries_[entry].fullShape_.size())
108  throw cms::Exception("TritonDataError") << name_ << " setShape(): dimension " << locFull << " out of bounds ("
109  << entries_[entry].fullShape_.size() << ")";
110 
111  if (val != entries_[entry].fullShape_[locFull]) {
112  if (dims_[locFull] == -1)
113  entries_[entry].fullShape_[locFull] = val;
114  else
115  throw cms::Exception("TritonDataError")
116  << name_ << " setShape(): attempt to change value of non-variable shape dimension " << loc;
117  }
118 }
119 
120 template <typename IO>
121 void TritonData<IO>::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize) {
122  sizeShape_ = shapeSize;
123  byteSizePerBatch_ = byteSize * sizeShape_;
124  totalByteSize_ = byteSizePerBatch_ * batchSize;
125 }
126 
127 template <typename IO>
129  totalByteSize_ = 0;
130  unsigned outerDim = client_->outerDim();
131  for (unsigned i = 0; i < entries_.size(); ++i) {
132  entries_[i].computeSizes(sizeShape(i), byteSize_, outerDim);
133  entries_[i].offset_ = totalByteSize_;
134  totalByteSize_ += entries_[i].totalByteSize_;
135  }
136 }
137 
138 //create a memory resource if none exists;
139 //otherwise, reuse the memory resource, resizing it if necessary
140 template <typename IO>
142  if (!memResource_ or size > memResource_->size()) {
144  //avoid unnecessarily throwing in destructor
145  if (memResource_)
146  memResource_->close();
147  //need to destroy before constructing new instance because shared memory key will be reused
148  memResource_.reset();
149  memResource_ = std::make_shared<TritonCpuShmResource<IO>>(this, shmName_, size);
150  }
151 #ifdef TRITON_ENABLE_GPU
153  //avoid unnecessarily throwing in destructor
154  if (memResource_)
155  memResource_->close();
156  //need to destroy before constructing new instance because shared memory key will be reused
157  memResource_.reset();
158  memResource_ = std::make_shared<TritonGpuShmResource<IO>>(this, shmName_, size);
159  }
160 #endif
161  //for remote/heap, size increases don't matter
162  else if (!memResource_)
163  memResource_ = std::make_shared<TritonHeapResource<IO>>(this, shmName_, size);
164  }
165 }
166 
167 //io accessors
168 template <>
169 template <typename DT>
171  //automatically creates a vector for each item (if batch size known)
172  auto ptr = std::make_shared<TritonInput<DT>>(client_->batchSize());
173  if (reserve) {
174  computeSizes();
175  for (auto& entry : entries_) {
176  if (anyNeg(entry.shape_))
177  continue;
178  for (auto& vec : *ptr) {
179  vec.reserve(entry.sizeShape_);
180  }
181  }
182  }
183  return ptr;
184 }
185 
186 template <>
187 template <typename DT>
189  //shouldn't be called twice
190  if (done_)
191  throw cms::Exception("TritonDataError") << name_ << " toServer() was already called for this event";
192 
193  const auto& data_in = *ptr;
194 
195  //check batch size
196  unsigned batchSize = client_->batchSize();
197  unsigned outerDim = client_->outerDim();
198  if (data_in.size() != batchSize) {
199  throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size()
200  << " but specified batch size is " << batchSize;
201  }
202 
203  //check type
204  checkType<DT>();
205 
206  computeSizes();
208 
209  unsigned offset = 0;
210  unsigned counter = 0;
211  for (unsigned i = 0; i < entries_.size(); ++i) {
212  auto& entry = entries_[i];
213 
214  //shape must be specified for variable dims or if batch size changes
215  if (!client_->noOuterDim())
216  entry.fullShape_[0] = outerDim;
217  entry.data_->SetShape(entry.fullShape_);
218 
219  for (unsigned i0 = 0; i0 < outerDim; ++i0) {
220  //avoid copying empty input
221  if (entry.byteSizePerBatch_ > 0)
222  memResource_->copyInput(data_in[counter].data(), offset, i);
223  offset += entry.byteSizePerBatch_;
224  ++counter;
225  }
226  }
227  memResource_->set();
228 
229  //keep input data in scope
230  holder_ = ptr;
231  done_ = true;
232 }
233 
234 //sets up shared memory for outputs, if possible
235 template <>
237  computeSizes();
239  memResource_->set();
240 }
241 
242 template <>
243 template <typename DT>
245  //shouldn't be called twice
246  if (done_)
247  throw cms::Exception("TritonDataError") << name_ << " fromServer() was already called for this event";
248 
249  //check type
250  checkType<DT>();
251 
252  memResource_->copyOutput();
253 
254  unsigned outerDim = client_->outerDim();
255  TritonOutput<DT> dataOut;
256  dataOut.reserve(client_->batchSize());
257  for (unsigned i = 0; i < entries_.size(); ++i) {
258  const auto& entry = entries_[i];
259  const DT* r1 = reinterpret_cast<const DT*>(entry.output_);
260 
261  if (entry.totalByteSize_ > 0 and !entry.result_) {
262  throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";
263  }
264 
265  for (unsigned i0 = 0; i0 < outerDim; ++i0) {
266  auto offset = i0 * entry.sizeShape_;
267  dataOut.emplace_back(r1 + offset, r1 + offset + entry.sizeShape_);
268  }
269  }
270 
271  done_ = true;
272  return dataOut;
273 }
274 
275 template <typename IO>
277  done_ = false;
278  holder_.reset();
279  entries_.clear();
280  totalByteSize_ = 0;
281  //re-initialize first shape entry
282  addEntryImpl(0);
283 }
284 
285 template <typename IO>
286 unsigned TritonData<IO>::fullLoc(unsigned loc) const {
287  return loc + (client_->noOuterDim() ? 0 : 1);
288 }
289 
290 //explicit template instantiation declarations
291 template class TritonData<tc::InferInput>;
293 
305 
317 
std::vector< TritonDataEntry > entries_
Definition: TritonData.h:188
size
Write out results.
bool noOuterDim() const
Definition: TritonClient.h:58
int64_t sizeShape(unsigned entry=0) const
Definition: TritonData.h:79
void addEntryImpl(unsigned entry)
Definition: TritonData.cc:53
std::shared_ptr< void > holder_
Definition: TritonData.h:193
bool done_
Definition: TritonData.h:196
size_t totalByteSize_
Definition: TritonData.h:189
unsigned fullLoc(unsigned loc) const
Definition: TritonData.cc:286
void createObject(IO **ioptr, const std::string &name, const std::string &dname)
int64_t byteSize() const
Definition: TritonData.h:72
void computeSizes()
Definition: TritonData.cc:128
const ShapeType dims_
Definition: TritonData.h:182
bool useShm_
Definition: TritonData.h:180
const std::string & dname() const
Definition: TritonData.h:73
void computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize)
Definition: TritonData.cc:121
void updateMem(size_t size)
Definition: TritonData.cc:141
void reset()
Definition: TritonData.cc:276
static std::string to_string(const XMLCh *ch)
void checkShm()
Definition: TritonData.h:144
IO * data(unsigned entry=0)
Definition: TritonData.h:148
int64_t productDims_
Definition: TritonData.h:184
int64_t byteSize_
Definition: TritonData.h:187
TritonInputContainer< DT > allocate(bool reserve=true)
Definition: TritonData.cc:170
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::vector< edm::Span< const DT * > > TritonOutput
Definition: TritonData.h:37
TritonServerType serverType() const
Definition: TritonClient.h:50
auto client()
Definition: TritonClient.h:95
void setShape(const ShapeType &newShape, unsigned entry=0)
TritonClient * client_
Definition: TritonData.h:179
bool anyNeg(const ShapeView &vec) const
Definition: TritonData.h:160
void prepare()
Definition: TritonData.cc:236
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
Definition: TritonData.cc:18
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
Definition: TritonData.h:50
std::shared_ptr< TritonInput< DT > > TritonInputContainer
Definition: TritonData.h:41
TritonOutput< DT > fromServer() const
Definition: TritonData.cc:244
triton::client::InferenceServerGrpcClient * client()
Definition: TritonData.cc:87
unsigned batchSize() const
unsigned outerDim() const
Definition: TritonClient.h:59
std::shared_ptr< TritonMemResource< IO > > memResource_
Definition: TritonData.h:194
int64_t dimProduct(const ShapeView &vec) const
Definition: TritonData.h:163
std::string xput() const
Definition: TritonData.cc:77
static std::atomic< unsigned int > counter
std::string name_
Definition: TritonData.h:178
std::vector< int64_t > ShapeType
Definition: TritonData.h:51
std::string shmName_
Definition: TritonData.h:181
bool variableDims_
Definition: TritonData.h:183
void toServer(TritonInputContainer< DT > ptr)
Definition: TritonData.cc:188
void addEntry(unsigned entry)
Definition: TritonData.cc:47