6 #include "model_config.pb.h" 7 #include "model_config.h" 11 namespace ni = nvidia::inferenceserver;
17 template <
typename IO>
27 dims_(model_info.shape().begin(), model_info.shape().
end()),
28 noBatch_(client_->noBatch()),
31 shape_(fullShape_.begin() + (noBatch_ ? 0 : 1), fullShape_.
end()),
32 variableDims_(anyNeg(shape_)),
33 productDims_(variableDims_ ? -1 : dimProduct(shape_)),
34 dname_(model_info.datatype()),
35 dtype_(ni::ProtocolStringToDataType(dname_)),
36 byteSize_(ni::GetDataTypeByteSize(dtype_)),
51 tc::InferRequestedOutput::Create(ioptr,
name_);
66 template <
typename IO>
68 return client_->client();
72 template <
typename IO>
74 for (
unsigned i = 0;
i < newShape.size(); ++
i) {
75 setShape(
i, newShape[
i]);
79 template <
typename IO>
81 unsigned locFull = fullLoc(loc);
84 if (locFull >= fullShape_.size())
86 << name_ <<
" setShape(): dimension " << locFull <<
" out of bounds (" << fullShape_.size() <<
")";
88 if (
val != fullShape_[locFull]) {
89 if (dims_[locFull] == -1)
90 fullShape_[locFull] =
val;
93 << name_ <<
" setShape(): attempt to change value of non-variable shape dimension " << loc;
97 template <
typename IO>
101 fullShape_[0] = batchSize_;
104 template <
typename IO>
106 sizeShape_ = sizeShape();
107 byteSizePerBatch_ = byteSize_ * sizeShape_;
108 totalByteSize_ = byteSizePerBatch_ * batchSize_;
110 template <
typename IO>
113 byteSizePerBatch_ = 0;
119 template <
typename IO>
121 if (!memResource_
or size > memResource_->size()) {
125 memResource_->close();
127 memResource_.reset();
128 memResource_ = std::make_shared<TritonCpuShmResource<IO>>(
this, shmName_,
size);
130 #ifdef TRITON_ENABLE_GPU 134 memResource_->close();
136 memResource_.reset();
137 memResource_ = std::make_shared<TritonGpuShmResource<IO>>(
this, shmName_,
size);
141 else if (!memResource_)
142 memResource_ = std::make_shared<TritonHeapResource<IO>>(
this, shmName_,
size);
148 template <
typename DT>
151 auto ptr = std::make_shared<TritonInput<DT>>(
batchSize_);
154 for (
auto& vec : *ptr) {
162 template <
typename DT>
166 throw cms::Exception(
"TritonDataError") <<
name_ <<
" toServer() was already called for this event";
168 const auto& data_in = *ptr;
172 throw cms::Exception(
"TritonDataError") <<
name_ <<
" toServer(): input vector has size " << data_in.size()
173 <<
" but specified batch size is " <<
batchSize_;
184 for (
unsigned i0 = 0; i0 <
batchSize_; ++i0) {
203 template <
typename DT>
207 throw cms::Exception(
"TritonDataError") <<
name_ <<
" fromServer() was already called for this event";
217 const DT*
r1 =
reinterpret_cast<const DT*
>(r0);
221 for (
unsigned i0 = 0; i0 <
batchSize_; ++i0) {
std::shared_ptr< IO > data_
void setBatchSize(unsigned bsize)
std::shared_ptr< void > holder_
unsigned fullLoc(unsigned loc) const
std::string to_string(const V &value)
void updateMem(size_t size)
TritonInputContainer< DT > allocate(bool reserve=true)
std::shared_ptr< Result > result_
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::vector< edm::Span< const DT * > > TritonOutput
bool anyNeg(const ShapeView &vec) const
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
void setShape(const ShapeType &newShape)
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
void createObject(IO **ioptr)
std::shared_ptr< TritonInput< DT > > TritonInputContainer
TritonOutput< DT > fromServer() const
triton::client::InferenceServerGrpcClient * client()
std::shared_ptr< TritonMemResource< IO > > memResource_
std::vector< int64_t > ShapeType
void toServer(TritonInputContainer< DT > ptr)