CMS 3D CMS Logo

TritonData.cc
Go to the documentation of this file.
6 
7 #include "model_config.pb.h"
8 #include "model_config.h"
9 
10 #include <sstream>
11 
12 namespace ni = nvidia::inferenceserver;
13 namespace tc = triton::client;
14 
15 //dims: kept constant, represents config.pbtxt parameters of model (converted from google::protobuf::RepeatedField to vector)
16 //fullShape: if batching is enabled, first entry is batch size; values can be modified
17 //shape: view into fullShape, excluding batch size entry
18 template <typename IO>
20  const TritonData<IO>::TensorMetadata& model_info,
22  const std::string& pid)
23  : name_(name),
24  client_(client),
25  useShm_(client_->useSharedMemory()),
26  //ensure unique name for shared memory region
27  shmName_(useShm_ ? pid + "_" + xput() + std::to_string(uid()) : ""),
28  dims_(model_info.shape().begin(), model_info.shape().end()),
29  noBatch_(client_->noBatch()),
30  batchSize_(0),
31  fullShape_(dims_),
32  shape_(fullShape_.begin() + (noBatch_ ? 0 : 1), fullShape_.end()),
33  variableDims_(anyNeg(shape_)),
34  productDims_(variableDims_ ? -1 : dimProduct(shape_)),
35  dname_(model_info.datatype()),
36  dtype_(ni::ProtocolStringToDataType(dname_)),
37  byteSize_(ni::GetDataTypeByteSize(dtype_)),
38  totalByteSize_(0) {
39  //create input or output object
40  IO* iotmp;
41  createObject(&iotmp);
42  data_.reset(iotmp);
43 }
44 
45 template <>
46 void TritonInputData::createObject(tc::InferInput** ioptr) {
47  tc::InferInput::Create(ioptr, name_, fullShape_, dname_);
48 }
49 
50 template <>
51 void TritonOutputData::createObject(tc::InferRequestedOutput** ioptr) {
52  tc::InferRequestedOutput::Create(ioptr, name_);
53  //another specialization for output: can't use shared memory if output size is not known
55 }
56 
57 template <>
59  return "input";
60 }
61 
62 template <>
64  return "output";
65 }
66 
67 template <typename IO>
68 tc::InferenceServerGrpcClient* TritonData<IO>::client() {
69  return client_->client();
70 }
71 
72 //setters
73 template <typename IO>
75  for (unsigned i = 0; i < newShape.size(); ++i) {
76  setShape(i, newShape[i]);
77  }
78 }
79 
80 template <typename IO>
81 void TritonData<IO>::setShape(unsigned loc, int64_t val) {
82  unsigned locFull = fullLoc(loc);
83 
84  //check boundary
85  if (locFull >= fullShape_.size())
86  throw cms::Exception("TritonDataError")
87  << name_ << " setShape(): dimension " << locFull << " out of bounds (" << fullShape_.size() << ")";
88 
89  if (val != fullShape_[locFull]) {
90  if (dims_[locFull] == -1)
91  fullShape_[locFull] = val;
92  else
93  throw cms::Exception("TritonDataError")
94  << name_ << " setShape(): attempt to change value of non-variable shape dimension " << loc;
95  }
96 }
97 
98 template <typename IO>
99 void TritonData<IO>::setBatchSize(unsigned bsize) {
100  batchSize_ = bsize;
101  if (!noBatch_)
102  fullShape_[0] = batchSize_;
103 }
104 
105 template <typename IO>
107  sizeShape_ = sizeShape();
108  byteSizePerBatch_ = byteSize_ * sizeShape_;
109  totalByteSize_ = byteSizePerBatch_ * batchSize_;
110 }
111 template <typename IO>
113  sizeShape_ = 0;
114  byteSizePerBatch_ = 0;
115  totalByteSize_ = 0;
116 }
117 
118 //create a memory resource if none exists;
119 //otherwise, reuse the memory resource, resizing it if necessary
120 template <typename IO>
122  if (!memResource_ or size > memResource_->size()) {
123  if (useShm_ and client_->serverType() == TritonServerType::LocalCPU) {
124  //avoid unnecessarily throwing in destructor
125  if (memResource_)
126  memResource_->close();
127  //need to destroy before constructing new instance because shared memory key will be reused
128  memResource_.reset();
129  memResource_ = std::make_shared<TritonCpuShmResource<IO>>(this, shmName_, size);
130  }
131 #ifdef TRITON_ENABLE_GPU
132  else if (useShm_ and client_->serverType() == TritonServerType::LocalGPU) {
133  //avoid unnecessarily throwing in destructor
134  if (memResource_)
135  memResource_->close();
136  //need to destroy before constructing new instance because shared memory key will be reused
137  memResource_.reset();
138  memResource_ = std::make_shared<TritonGpuShmResource<IO>>(this, shmName_, size);
139  }
140 #endif
141  //for remote/heap, size increases don't matter
142  else if (!memResource_)
143  memResource_ = std::make_shared<TritonHeapResource<IO>>(this, shmName_, size);
144  }
145 }
146 
147 //io accessors
148 template <>
149 template <typename DT>
151  //automatically creates a vector for each batch entry (if batch size known)
152  auto ptr = std::make_shared<TritonInput<DT>>(batchSize_);
153  if (reserve and !anyNeg(shape_)) {
154  computeSizes();
155  for (auto& vec : *ptr) {
156  vec.reserve(sizeShape_);
157  }
158  }
159  return ptr;
160 }
161 
162 template <>
163 template <typename DT>
165  //shouldn't be called twice
166  if (done_)
167  throw cms::Exception("TritonDataError") << name_ << " toServer() was already called for this event";
168 
169  const auto& data_in = *ptr;
170 
171  //check batch size
172  if (data_in.size() != batchSize_) {
173  throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size()
174  << " but specified batch size is " << batchSize_;
175  }
176 
177  //shape must be specified for variable dims or if batch size changes
178  data_->SetShape(fullShape_);
179 
180  if (byteSize_ != sizeof(DT))
181  throw cms::Exception("TritonDataError") << name_ << " toServer(): inconsistent byte size " << sizeof(DT)
182  << " (should be " << byteSize_ << " for " << dname_ << ")";
183 
184  computeSizes();
186  for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
187  memResource_->copyInput(data_in[i0].data(), i0 * byteSizePerBatch_);
188  }
189  memResource_->set();
190 
191  //keep input data in scope
192  holder_ = ptr;
193  done_ = true;
194 }
195 
196 //sets up shared memory for outputs, if possible
197 template <>
199  computeSizes();
201  memResource_->set();
202 }
203 
204 template <>
205 template <typename DT>
207  //shouldn't be called twice
208  if (done_)
209  throw cms::Exception("TritonDataError") << name_ << " fromServer() was already called for this event";
210 
211  if (!result_) {
212  throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";
213  }
214 
215  if (byteSize_ != sizeof(DT)) {
216  throw cms::Exception("TritonDataError") << name_ << " fromServer(): inconsistent byte size " << sizeof(DT)
217  << " (should be " << byteSize_ << " for " << dname_ << ")";
218  }
219 
220  const uint8_t* r0 = memResource_->copyOutput();
221  const DT* r1 = reinterpret_cast<const DT*>(r0);
222 
223  TritonOutput<DT> dataOut;
224  dataOut.reserve(batchSize_);
225  for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
226  auto offset = i0 * sizeShape_;
227  dataOut.emplace_back(r1 + offset, r1 + offset + sizeShape_);
228  }
229 
230  done_ = true;
231  return dataOut;
232 }
233 
234 template <>
236  done_ = false;
237  holder_.reset();
238  data_->Reset();
239  //reset shape
240  if (variableDims_) {
241  for (unsigned i = 0; i < shape_.size(); ++i) {
242  unsigned locFull = fullLoc(i);
243  fullShape_[locFull] = dims_[locFull];
244  }
245  }
246  resetSizes();
247 }
248 
249 template <>
251  done_ = false;
252  result_.reset();
253  holder_.reset();
254  resetSizes();
255 }
256 
257 //explicit template instantiation declarations
258 template class TritonData<tc::InferInput>;
260 
263 
266 
TritonData::data
IO * data()
Definition: TritonData.h:94
mps_fire.i
i
Definition: mps_fire.py:428
TritonData.h
TritonData
Definition: TritonData.h:45
MessageLogger.h
TritonData::result_
std::shared_ptr< Result > result_
Definition: TritonData.h:139
TritonInputContainer
std::shared_ptr< TritonInput< DT > > TritonInputContainer
Definition: TritonData.h:39
TritonData::reset
void reset()
Definition: TritonData.cc:235
TritonData::TensorMetadata
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
Definition: TritonData.h:48
TritonData::setShape
void setShape(const ShapeType &newShape)
TritonClient.h
TritonData::xput
std::string xput() const
Definition: TritonData.cc:58
TritonData::client
triton::client::InferenceServerGrpcClient * client()
Definition: TritonData.cc:68
TritonData::batchSize_
unsigned batchSize_
Definition: TritonData.h:123
TritonData::resetSizes
void resetSizes()
Definition: TritonData.cc:112
TritonData::memResource_
std::shared_ptr< TritonMemResource< IO > > memResource_
Definition: TritonData.h:138
TritonData::ShapeType
std::vector< int64_t > ShapeType
Definition: TritonData.h:49
TritonData::fullShape_
ShapeType fullShape_
Definition: TritonData.h:124
TritonData::fromServer
TritonOutput< DT > fromServer() const
Definition: TritonData.cc:206
TritonData::TritonData
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
Definition: TritonData.cc:19
TritonData::createObject
void createObject(IO **ioptr)
TritonData::name_
std::string name_
Definition: TritonData.h:116
HLTObjectMonitor_Client_cff.client
client
Definition: HLTObjectMonitor_Client_cff.py:6
mps_fire.end
end
Definition: mps_fire.py:242
TritonData::variableDims_
bool variableDims_
Definition: TritonData.h:126
TritonData::dname_
std::string dname_
Definition: TritonData.h:128
TritonData::data_
std::shared_ptr< IO > data_
Definition: TritonData.h:117
edm::Span::size
auto size() const
Definition: Span.h:24
TritonData::done_
bool done_
Definition: TritonData.h:141
GeomDetEnumerators::DT
Definition: GeomDetEnumerators.h:18
TritonData::toServer
void toServer(TritonInputContainer< DT > ptr)
Definition: TritonData.cc:164
visDQMUpload.uid
uid
Definition: visDQMUpload.py:122
TritonServerType::LocalCPU
TritonOutput
std::vector< edm::Span< const DT * > > TritonOutput
Definition: TritonData.h:35
TritonData::setBatchSize
void setBatchSize(unsigned bsize)
Definition: TritonData.cc:99
TritonData::fullLoc
unsigned fullLoc(unsigned loc) const
Definition: TritonData.h:90
TritonData::shape_
ShapeView shape_
Definition: TritonData.h:125
TritonData::byteSizePerBatch_
size_t byteSizePerBatch_
Definition: TritonData.h:132
TritonData::holder_
std::shared_ptr< void > holder_
Definition: TritonData.h:137
TritonData::updateMem
void updateMem(size_t size)
Definition: TritonData.cc:121
AlCaHLTBitMon_QueryRunRegistry.string
string string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256
TritonData::computeSizes
void computeSizes()
Definition: TritonData.cc:106
TritonData::totalByteSize_
size_t totalByteSize_
Definition: TritonData.h:133
TritonData::dims_
const ShapeType dims_
Definition: TritonData.h:121
TritonServerType::LocalGPU
heppy_batch.val
val
Definition: heppy_batch.py:351
std
Definition: JetResolutionObject.h:76
triton_utils.h
TritonData::sizeShape_
size_t sizeShape_
Definition: TritonData.h:131
diffTwoXMLs.r1
r1
Definition: diffTwoXMLs.py:53
Exception
Definition: hltDiff.cc:245
Skims_PA_cff.name
name
Definition: Skims_PA_cff.py:17
or
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
TritonData::byteSize_
int64_t byteSize_
Definition: TritonData.h:130
TritonData::allocate
TritonInputContainer< DT > allocate(bool reserve=true)
Definition: TritonData.cc:150
cms::Exception
Definition: Exception.h:70
TritonMemResource.h
TritonData::useShm_
bool useShm_
Definition: TritonData.h:119
hltrates_dqm_sourceclient-live_cfg.offset
offset
Definition: hltrates_dqm_sourceclient-live_cfg.py:82
TritonClient
Definition: TritonClient.h:19
findQualityFiles.size
size
Write out results.
Definition: findQualityFiles.py:443
TritonData::anyNeg
bool anyNeg(const ShapeView &vec) const
Definition: TritonData.h:101
TritonData::prepare
void prepare()
Definition: TritonData.cc:198