6 #include "model_config.pb.h" 7 #include "triton/common/model_config.h" 17 template <
typename IO>
28 dname_(model_info.datatype()),
29 dtype_(tco::ProtocolStringToDataType(dname_)),
30 byteSize_(tco::GetDataTypeByteSize(dtype_)),
46 template <
typename IO>
49 client_->addEntry(
entry);
52 template <
typename IO>
54 if (
entry >= entries_.size()) {
55 entries_.reserve(
entry + 1);
56 for (
unsigned i = entries_.size();
i <
entry + 1; ++
i) {
57 entries_.emplace_back(dims_, client_->noOuterDim(), name_, dname_);
73 tc::InferRequestedOutput::Create(ioptr,
name);
86 template <
typename IO>
92 template <
typename IO>
95 for (
unsigned i = 0;
i < newShape.size(); ++
i) {
100 template <
typename IO>
104 unsigned locFull =
fullLoc(loc);
108 throw cms::Exception(
"TritonDataError") <<
name_ <<
" setShape(): dimension " << locFull <<
" out of bounds (" 112 if (
dims_[locFull] == -1)
116 <<
name_ <<
" setShape(): attempt to change value of non-variable shape dimension " << loc;
120 template <
typename IO>
122 sizeShape_ = shapeSize;
123 byteSizePerBatch_ =
byteSize * sizeShape_;
127 template <
typename IO>
140 template <
typename IO>
151 #ifdef TRITON_ENABLE_GPU 169 template <
typename DT>
178 for (
auto& vec : *ptr) {
179 vec.reserve(
entry.sizeShape_);
187 template <
typename DT>
191 throw cms::Exception(
"TritonDataError") <<
name_ <<
" toServer() was already called for this event";
193 const auto& data_in = *ptr;
199 throw cms::Exception(
"TritonDataError") <<
name_ <<
" toServer(): input vector has size " << data_in.size()
200 <<
" but specified batch size is " <<
batchSize;
216 entry.fullShape_[0] = outerDim;
219 for (
unsigned i0 = 0; i0 < outerDim; ++i0) {
221 if (
entry.byteSizePerBatch_ > 0)
243 template <
typename DT>
247 throw cms::Exception(
"TritonDataError") <<
name_ <<
" fromServer() was already called for this event";
259 const DT*
r1 =
reinterpret_cast<const DT*
>(
entry.output_);
261 if (
entry.totalByteSize_ > 0 and !
entry.result_) {
265 for (
unsigned i0 = 0; i0 < outerDim; ++i0) {
275 template <
typename IO>
285 template <
typename IO>
std::vector< TritonDataEntry > entries_
int64_t sizeShape(unsigned entry=0) const
void addEntryImpl(unsigned entry)
std::shared_ptr< void > holder_
unsigned fullLoc(unsigned loc) const
void createObject(IO **ioptr, const std::string &name, const std::string &dname)
const std::string & dname() const
void computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize)
void updateMem(size_t size)
static std::string to_string(const XMLCh *ch)
IO * data(unsigned entry=0)
TritonInputContainer< DT > allocate(bool reserve=true)
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::vector< edm::Span< const DT * > > TritonOutput
TritonServerType serverType() const
void setShape(const ShapeType &newShape, unsigned entry=0)
bool anyNeg(const ShapeView &vec) const
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
std::shared_ptr< TritonInput< DT > > TritonInputContainer
TritonOutput< DT > fromServer() const
triton::client::InferenceServerGrpcClient * client()
unsigned batchSize() const
unsigned outerDim() const
std::shared_ptr< TritonMemResource< IO > > memResource_
int64_t dimProduct(const ShapeView &vec) const
static std::atomic< unsigned int > counter
std::vector< int64_t > ShapeType
void toServer(TritonInputContainer< DT > ptr)
void addEntry(unsigned entry)