6 #include "model_config.pb.h"
7 #include "model_config.h"
11 namespace ni = nvidia::inferenceserver;
17 template <
typename IO>
24 useShm_(client_->useSharedMemory()),
26 shmName_(useShm_ ? pid +
"_" + xput() + std::
to_string(uid()) :
""),
27 dims_(model_info.shape().
begin(), model_info.shape().
end()),
28 noBatch_(client_->noBatch()),
31 shape_(fullShape_.
begin() + (noBatch_ ? 0 : 1), fullShape_.
end()),
32 variableDims_(anyNeg(shape_)),
33 productDims_(variableDims_ ? -1 : dimProduct(shape_)),
34 dname_(model_info.datatype()),
35 dtype_(ni::ProtocolStringToDataType(dname_)),
36 byteSize_(ni::GetDataTypeByteSize(dtype_)),
51 tc::InferRequestedOutput::Create(ioptr,
name_);
66 template <
typename IO>
68 return client_->client();
72 template <
typename IO>
74 for (
unsigned i = 0;
i < newShape.size(); ++
i) {
75 setShape(
i, newShape[
i]);
79 template <
typename IO>
81 unsigned locFull = fullLoc(loc);
84 if (locFull >= fullShape_.size())
86 << name_ <<
" setShape(): dimension " << locFull <<
" out of bounds (" << fullShape_.size() <<
")";
88 if (val != fullShape_[locFull]) {
89 if (dims_[locFull] == -1)
90 fullShape_[locFull] =
val;
93 << name_ <<
" setShape(): attempt to change value of non-variable shape dimension " << loc;
97 template <
typename IO>
101 fullShape_[0] = batchSize_;
104 template <
typename IO>
106 sizeShape_ = sizeShape();
107 byteSizePerBatch_ = byteSize_ * sizeShape_;
108 totalByteSize_ = byteSizePerBatch_ * batchSize_;
110 template <
typename IO>
113 byteSizePerBatch_ = 0;
119 template <
typename IO>
121 if (!memResource_
or size > memResource_->size()) {
125 memResource_->close();
127 memResource_.reset();
128 memResource_ = std::make_shared<TritonCpuShmResource<IO>>(
this, shmName_,
size);
130 #ifdef TRITON_ENABLE_GPU
134 memResource_->close();
136 memResource_.reset();
137 memResource_ = std::make_shared<TritonGpuShmResource<IO>>(
this, shmName_,
size);
141 else if (!memResource_)
142 memResource_ = std::make_shared<TritonHeapResource<IO>>(
this, shmName_,
size);
148 template <
typename DT>
151 auto ptr = std::make_shared<TritonInput<DT>>(
batchSize_);
154 for (
auto& vec : *ptr) {
162 template <
typename DT>
166 throw cms::Exception(
"TritonDataError") <<
name_ <<
" toServer() was already called for this event";
168 const auto& data_in = *ptr;
172 throw cms::Exception(
"TritonDataError") <<
name_ <<
" toServer(): input vector has size " << data_in.size()
173 <<
" but specified batch size is " <<
batchSize_;
184 for (
unsigned i0 = 0; i0 <
batchSize_; ++i0) {
203 template <
typename DT>
207 throw cms::Exception(
"TritonDataError") <<
name_ <<
" fromServer() was already called for this event";
217 const DT*
r1 =
reinterpret_cast<const DT*
>(r0);
221 for (
unsigned i0 = 0; i0 <
batchSize_; ++i0) {
223 dataOut.emplace_back(r1 +
offset, r1 +
offset + sizeShape_);
std::shared_ptr< IO > data_
void setBatchSize(unsigned bsize)
TritonOutput< DT > fromServer() const
std::shared_ptr< TritonInput< DT >> TritonInputContainer
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventIDconst &, edm::Timestampconst & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::shared_ptr< void > holder_
std::string to_string(const V &value)
unsigned fullLoc(unsigned loc) const
void updateMem(size_t size)
bool anyNeg(const ShapeView &vec) const
std::shared_ptr< Result > result_
TritonInputContainer< DT > allocate(bool reserve=true)
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
void setShape(const ShapeType &newShape)
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
void createObject(IO **ioptr)
triton::client::InferenceServerGrpcClient * client()
std::vector< edm::Span< const DT * >> TritonOutput
std::shared_ptr< TritonMemResource< IO > > memResource_
std::vector< int64_t > ShapeType
void toServer(TritonInputContainer< DT > ptr)
tuple size
Write out results.