7 #include "model_config.pb.h"
8 #include "model_config.h"
12 namespace ni = nvidia::inferenceserver;
18 template <
typename IO>
25 useShm_(client_->useSharedMemory()),
27 shmName_(useShm_ ? pid +
"_" + xput() + std::to_string(uid()) :
""),
28 dims_(model_info.shape().
begin(), model_info.shape().
end()),
29 noBatch_(client_->noBatch()),
32 shape_(fullShape_.
begin() + (noBatch_ ? 0 : 1), fullShape_.
end()),
33 variableDims_(anyNeg(shape_)),
34 productDims_(variableDims_ ? -1 : dimProduct(shape_)),
35 dname_(model_info.datatype()),
36 dtype_(ni::ProtocolStringToDataType(dname_)),
37 byteSize_(ni::GetDataTypeByteSize(dtype_)),
52 tc::InferRequestedOutput::Create(ioptr,
name_);
67 template <
typename IO>
69 return client_->client();
73 template <
typename IO>
75 for (
unsigned i = 0;
i < newShape.size(); ++
i) {
76 setShape(
i, newShape[
i]);
80 template <
typename IO>
82 unsigned locFull = fullLoc(loc);
85 if (locFull >= fullShape_.size())
87 << name_ <<
" setShape(): dimension " << locFull <<
" out of bounds (" << fullShape_.size() <<
")";
89 if (val != fullShape_[locFull]) {
90 if (dims_[locFull] == -1)
91 fullShape_[locFull] =
val;
94 << name_ <<
" setShape(): attempt to change value of non-variable shape dimension " << loc;
98 template <
typename IO>
102 fullShape_[0] = batchSize_;
105 template <
typename IO>
107 sizeShape_ = sizeShape();
108 byteSizePerBatch_ = byteSize_ * sizeShape_;
109 totalByteSize_ = byteSizePerBatch_ * batchSize_;
111 template <
typename IO>
114 byteSizePerBatch_ = 0;
120 template <
typename IO>
122 if (!memResource_
or size > memResource_->size()) {
126 memResource_->close();
128 memResource_.reset();
129 memResource_ = std::make_shared<TritonCpuShmResource<IO>>(
this, shmName_,
size);
131 #ifdef TRITON_ENABLE_GPU
135 memResource_->close();
137 memResource_.reset();
138 memResource_ = std::make_shared<TritonGpuShmResource<IO>>(
this, shmName_,
size);
142 else if (!memResource_)
143 memResource_ = std::make_shared<TritonHeapResource<IO>>(
this, shmName_,
size);
149 template <
typename DT>
152 auto ptr = std::make_shared<TritonInput<DT>>(
batchSize_);
155 for (
auto& vec : *ptr) {
163 template <
typename DT>
167 throw cms::Exception(
"TritonDataError") <<
name_ <<
" toServer() was already called for this event";
169 const auto& data_in = *ptr;
173 throw cms::Exception(
"TritonDataError") <<
name_ <<
" toServer(): input vector has size " << data_in.size()
174 <<
" but specified batch size is " <<
batchSize_;
181 throw cms::Exception(
"TritonDataError") <<
name_ <<
" toServer(): inconsistent byte size " <<
sizeof(
DT)
186 for (
unsigned i0 = 0; i0 <
batchSize_; ++i0) {
205 template <
typename DT>
209 throw cms::Exception(
"TritonDataError") <<
name_ <<
" fromServer() was already called for this event";
216 throw cms::Exception(
"TritonDataError") <<
name_ <<
" fromServer(): inconsistent byte size " <<
sizeof(
DT)
221 const DT*
r1 =
reinterpret_cast<const DT*
>(r0);
225 for (
unsigned i0 = 0; i0 <
batchSize_; ++i0) {
227 dataOut.emplace_back(r1 +
offset, r1 +
offset + sizeShape_);
std::shared_ptr< IO > data_
void setBatchSize(unsigned bsize)
std::shared_ptr< TritonInput< DT >> TritonInputContainer
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventIDconst &, edm::Timestampconst & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::shared_ptr< void > holder_
TritonInputContainer< DT > allocate(bool reserve=true)
unsigned fullLoc(unsigned loc) const
void updateMem(size_t size)
bool anyNeg(const ShapeView &vec) const
std::shared_ptr< Result > result_
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
void setShape(const ShapeType &newShape)
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
void createObject(IO **ioptr)
TritonOutput< DT > fromServer() const
triton::client::InferenceServerGrpcClient * client()
std::vector< edm::Span< const DT * >> TritonOutput
std::shared_ptr< TritonMemResource< IO > > memResource_
std::vector< int64_t > ShapeType
void toServer(TritonInputContainer< DT > ptr)
tuple size
Write out results.