7 #include "model_config.pb.h"
8 #include "model_config.h"
12 namespace ni = nvidia::inferenceserver;
18 template <
typename IO>
25 useShm_(client_->useSharedMemory()),
27 shmName_(useShm_ ? pid +
"_" + xput() +
std::to_string(
uid()) :
""),
28 dims_(model_info.shape().begin(), model_info.shape().
end()),
29 noBatch_(client_->noBatch()),
32 shape_(fullShape_.begin() + (noBatch_ ? 0 : 1), fullShape_.
end()),
33 variableDims_(anyNeg(shape_)),
34 productDims_(variableDims_ ? -1 : dimProduct(shape_)),
35 dname_(model_info.datatype()),
36 dtype_(ni::ProtocolStringToDataType(dname_)),
37 byteSize_(ni::GetDataTypeByteSize(dtype_)),
52 tc::InferRequestedOutput::Create(ioptr,
name_);
67 template <
typename IO>
69 return client_->client();
73 template <
typename IO>
75 for (
unsigned i = 0;
i < newShape.size(); ++
i) {
76 setShape(
i, newShape[
i]);
80 template <
typename IO>
82 unsigned locFull = fullLoc(loc);
85 if (locFull >= fullShape_.size())
87 << name_ <<
" setShape(): dimension " << locFull <<
" out of bounds (" << fullShape_.size() <<
")";
89 if (
val != fullShape_[locFull]) {
90 if (dims_[locFull] == -1)
91 fullShape_[locFull] =
val;
94 << name_ <<
" setShape(): attempt to change value of non-variable shape dimension " << loc;
98 template <
typename IO>
102 fullShape_[0] = batchSize_;
105 template <
typename IO>
107 sizeShape_ = sizeShape();
108 byteSizePerBatch_ = byteSize_ * sizeShape_;
109 totalByteSize_ = byteSizePerBatch_ * batchSize_;
111 template <
typename IO>
114 byteSizePerBatch_ = 0;
120 template <
typename IO>
122 if (!memResource_
or size > memResource_->size()) {
126 memResource_->close();
128 memResource_.reset();
129 memResource_ = std::make_shared<TritonCpuShmResource<IO>>(
this, shmName_,
size);
131 #ifdef TRITON_ENABLE_GPU
135 memResource_->close();
137 memResource_.reset();
138 memResource_ = std::make_shared<TritonGpuShmResource<IO>>(
this, shmName_,
size);
142 else if (!memResource_)
143 memResource_ = std::make_shared<TritonHeapResource<IO>>(
this, shmName_,
size);
149 template <
typename DT>
152 auto ptr = std::make_shared<TritonInput<DT>>(
batchSize_);
155 for (
auto& vec : *ptr) {
163 template <
typename DT>
167 throw cms::Exception(
"TritonDataError") <<
name_ <<
" toServer() was already called for this event";
169 const auto& data_in = *ptr;
173 throw cms::Exception(
"TritonDataError") <<
name_ <<
" toServer(): input vector has size " << data_in.size()
174 <<
" but specified batch size is " <<
batchSize_;
181 throw cms::Exception(
"TritonDataError") <<
name_ <<
" toServer(): inconsistent byte size " <<
sizeof(
DT)
186 for (
unsigned i0 = 0; i0 <
batchSize_; ++i0) {
205 template <
typename DT>
209 throw cms::Exception(
"TritonDataError") <<
name_ <<
" fromServer() was already called for this event";
216 throw cms::Exception(
"TritonDataError") <<
name_ <<
" fromServer(): inconsistent byte size " <<
sizeof(
DT)
221 const DT*
r1 = reinterpret_cast<const DT*>(r0);
225 for (
unsigned i0 = 0; i0 <
batchSize_; ++i0) {