CMS 3D CMS Logo

TritonData.cc
Go to the documentation of this file.
4 
5 #include "model_config.pb.h"
6 
7 #include <cstring>
8 #include <sstream>
9 
10 namespace ni = nvidia::inferenceserver;
11 namespace nic = ni::client;
12 
13 namespace nvidia {
14  namespace inferenceserver {
15  //in libgrpcclient.so, but corresponding header src/core/model_config.h not available
18  } // namespace inferenceserver
19 } // namespace nvidia
20 
21 //dims: kept constant, represents config.pbtxt parameters of model (converted from google::protobuf::RepeatedField to vector)
22 //fullShape: if batching is enabled, first entry is batch size; values can be modified
23 //shape: view into fullShape, excluding batch size entry
24 template <typename IO>
26  : name_(name),
27  dims_(model_info.shape().begin(), model_info.shape().end()),
28  noBatch_(noBatch),
29  batchSize_(0),
30  fullShape_(dims_),
31  shape_(fullShape_.begin() + (noBatch_ ? 0 : 1), fullShape_.end()),
32  variableDims_(anyNeg(shape_)),
33  productDims_(variableDims_ ? -1 : dimProduct(shape_)),
34  dname_(model_info.datatype()),
35  dtype_(ni::ProtocolStringToDataType(dname_)),
36  byteSize_(ni::GetDataTypeByteSize(dtype_)) {
37  //create input or output object
38  IO* iotmp;
39  createObject(&iotmp);
40  data_.reset(iotmp);
41 }
42 
43 template <>
44 void TritonInputData::createObject(nic::InferInput** ioptr) const {
45  nic::InferInput::Create(ioptr, name_, fullShape_, dname_);
46 }
47 
48 template <>
49 void TritonOutputData::createObject(nic::InferRequestedOutput** ioptr) const {
50  nic::InferRequestedOutput::Create(ioptr, name_);
51 }
52 
53 //setters
54 template <typename IO>
55 bool TritonData<IO>::setShape(const TritonData<IO>::ShapeType& newShape, bool canThrow) {
56  bool result = true;
57  for (unsigned i = 0; i < newShape.size(); ++i) {
58  result &= setShape(i, newShape[i], canThrow);
59  }
60  return result;
61 }
62 
63 template <typename IO>
64 bool TritonData<IO>::setShape(unsigned loc, int64_t val, bool canThrow) {
65  std::stringstream msg;
66  unsigned full_loc = loc + (noBatch_ ? 0 : 1);
67 
68  //check boundary
69  if (full_loc >= fullShape_.size()) {
70  msg << name_ << " setShape(): dimension " << full_loc << " out of bounds (" << fullShape_.size() << ")";
71  if (canThrow)
72  throw cms::Exception("TritonDataError") << msg.str();
73  else {
74  edm::LogWarning("TritonDataWarning") << msg.str();
75  return false;
76  }
77  }
78 
79  if (val != fullShape_[full_loc]) {
80  if (dims_[full_loc] == -1) {
81  fullShape_[full_loc] = val;
82  return true;
83  } else {
84  msg << name_ << " setShape(): attempt to change value of non-variable shape dimension " << loc;
85  if (canThrow)
86  throw cms::Exception("TritonDataError") << msg.str();
87  else {
88  edm::LogWarning("TritonDataError") << msg.str();
89  return false;
90  }
91  }
92  }
93 
94  return true;
95 }
96 
97 template <typename IO>
98 void TritonData<IO>::setBatchSize(unsigned bsize) {
99  batchSize_ = bsize;
100  if (!noBatch_)
101  fullShape_[0] = batchSize_;
102 }
103 
104 //io accessors
105 template <>
106 template <typename DT>
107 void TritonInputData::toServer(std::shared_ptr<TritonInput<DT>> ptr) {
108  const auto& data_in = *ptr;
109 
110  //check batch size
111  if (data_in.size() != batchSize_) {
112  throw cms::Exception("TritonDataError") << name_ << " input(): input vector has size " << data_in.size()
113  << " but specified batch size is " << batchSize_;
114  }
115 
116  //shape must be specified for variable dims or if batch size changes
117  data_->SetShape(fullShape_);
118 
119  if (byteSize_ != sizeof(DT))
120  throw cms::Exception("TritonDataError") << name_ << " input(): inconsistent byte size " << sizeof(DT)
121  << " (should be " << byteSize_ << " for " << dname_ << ")";
122 
123  int64_t nInput = sizeShape();
124  for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
125  const DT* arr = data_in[i0].data();
126  triton_utils::throwIfError(data_->AppendRaw(reinterpret_cast<const uint8_t*>(arr), nInput * byteSize_),
127  name_ + " input(): unable to set data for batch entry " + std::to_string(i0));
128  }
129 
130  //keep input data in scope
131  holder_ = std::move(ptr);
132 }
133 
134 template <>
135 template <typename DT>
137  if (!result_) {
138  throw cms::Exception("TritonDataError") << name_ << " output(): missing result";
139  }
140 
141  if (byteSize_ != sizeof(DT)) {
142  throw cms::Exception("TritonDataError") << name_ << " output(): inconsistent byte size " << sizeof(DT)
143  << " (should be " << byteSize_ << " for " << dname_ << ")";
144  }
145 
146  uint64_t nOutput = sizeShape();
147  TritonOutput<DT> dataOut;
148  const uint8_t* r0;
149  size_t contentByteSize;
150  size_t expectedContentByteSize = nOutput * byteSize_ * batchSize_;
151  triton_utils::throwIfError(result_->RawData(name_, &r0, &contentByteSize), "output(): unable to get raw");
152  if (contentByteSize != expectedContentByteSize) {
153  throw cms::Exception("TritonDataError") << name_ << " output(): unexpected content byte size " << contentByteSize
154  << " (expected " << expectedContentByteSize << ")";
155  }
156 
157  const DT* r1 = reinterpret_cast<const DT*>(r0);
158  dataOut.reserve(batchSize_);
159  for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
160  auto offset = i0 * nOutput;
161  dataOut.emplace_back(r1 + offset, r1 + offset + nOutput);
162  }
163 
164  return dataOut;
165 }
166 
167 template <>
169  data_->Reset();
170  holder_.reset();
171 }
172 
173 template <>
175  result_.reset();
176 }
177 
178 //explicit template instantiation declarations
179 template class TritonData<nic::InferInput>;
181 
182 template void TritonInputData::toServer(std::shared_ptr<TritonInput<float>> data_in);
183 template void TritonInputData::toServer(std::shared_ptr<TritonInput<int64_t>> data_in);
184 
nvidia::inferenceserver
Definition: TritonData.cc:14
mps_fire.i
i
Definition: mps_fire.py:428
TritonData.h
TritonData
Definition: TritonData.h:29
MessageLogger.h
TritonData::result_
std::shared_ptr< Result > result_
Definition: TritonData.h:95
TritonData::holder_
std::any holder_
Definition: TritonData.h:94
TritonData::reset
void reset()
Definition: TritonData.cc:168
TritonData::TensorMetadata
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
Definition: TritonData.h:32
TritonData::sizeShape
int64_t sizeShape() const
Definition: TritonData.h:59
TritonData::toServer
void toServer(std::shared_ptr< TritonInput< DT >> ptr)
Definition: TritonData.cc:107
TritonData::batchSize_
unsigned batchSize_
Definition: TritonData.h:86
mps_check.msg
tuple msg
Definition: mps_check.py:285
TritonInput
std::vector< std::vector< DT > > TritonInput
Definition: TritonData.h:23
nvidia
Definition: TritonData.cc:13
TritonData::ShapeType
std::vector< int64_t > ShapeType
Definition: TritonData.h:33
edm::LogWarning
Log< level::Warning, false > LogWarning
Definition: MessageLogger.h:122
TritonData::fullShape_
ShapeType fullShape_
Definition: TritonData.h:87
TritonData::fromServer
TritonOutput< DT > fromServer() const
Definition: TritonData.cc:136
TritonData::createObject
void createObject(IO **ioptr) const
triton_utils::throwIfError
void throwIfError(const Error &err, std::string_view msg)
Definition: triton_utils.cc:20
TritonData::name_
std::string name_
Definition: TritonData.h:82
HLTObjectMonitor_Client_cff.client
client
Definition: HLTObjectMonitor_Client_cff.py:6
TritonData::setShape
bool setShape(const ShapeType &newShape)
Definition: TritonData.h:40
mps_fire.end
end
Definition: mps_fire.py:242
TritonData::dname_
std::string dname_
Definition: TritonData.h:91
TritonData::data_
std::shared_ptr< IO > data_
Definition: TritonData.h:83
AlCaHLTBitMon_QueryRunRegistry.string
string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256
GeomDetEnumerators::DT
Definition: GeomDetEnumerators.h:18
TritonOutput
std::vector< edm::Span< const DT * > > TritonOutput
Definition: TritonData.h:25
TritonData::setBatchSize
void setBatchSize(unsigned bsize)
Definition: TritonData.cc:98
runTauDisplay.dtype
dtype
Definition: runTauDisplay.py:121
heppy_batch.val
val
Definition: heppy_batch.py:351
eostools.move
def move(src, dest)
Definition: eostools.py:511
triton_utils.h
diffTwoXMLs.r1
r1
Definition: diffTwoXMLs.py:53
Exception
Definition: hltDiff.cc:245
Skims_PA_cff.name
name
Definition: Skims_PA_cff.py:17
nvidia::inferenceserver::ProtocolStringToDataType
inference::DataType ProtocolStringToDataType(const std::string &dtype)
cond::uint64_t
unsigned long long uint64_t
Definition: Time.h:13
TritonData::byteSize_
int64_t byteSize_
Definition: TritonData.h:93
mps_fire.result
result
Definition: mps_fire.py:311
nvidia::inferenceserver::GetDataTypeByteSize
size_t GetDataTypeByteSize(const inference::DataType dtype)
hltrates_dqm_sourceclient-live_cfg.offset
offset
Definition: hltrates_dqm_sourceclient-live_cfg.py:82
TritonData::TritonData
TritonData(const std::string &name, const TensorMetadata &model_info, bool noBatch)
Definition: TritonData.cc:25
HLT_FULL_cff.DataType
DataType
Definition: HLT_FULL_cff.py:32765