d6/d59/TritonData_8cc_source.html

 #include "HeterogeneousCore/SonicTriton/interface/TritonData.h"

 #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h"

 #include "HeterogeneousCore/SonicTriton/interface/TritonMemResource.h"

 #include "FWCore/MessageLogger/interface/MessageLogger.h"


 #include "model_config.pb.h"

 #include "model_config.h"


 #include <sstream>


 namespace ni = nvidia::inferenceserver;

 namespace tc = triton::client;


 //dims: kept constant, represents config.pbtxt parameters of model (converted from google::protobuf::RepeatedField to vector)

 //fullShape: if batching is enabled, first entry is batch size; values can be modified

 //shape: view into fullShape, excluding batch size entry

 template <typename IO>

 TritonData<IO>::TritonData(const std::string& name,

                            const TritonData<IO>::TensorMetadata& model_info,

                            TritonClient* client,

                            const std::string& pid)

     : name_(name),

       client_(client),

       useShm_(client_->useSharedMemory()),

       //ensure unique name for shared memory region

       shmName_(useShm_ ? pid + "_" + xput() + std::to_string(uid()) : ""),

       dims_(model_info.shape().begin(), model_info.shape().end()),

       noBatch_(client_->noBatch()),

       batchSize_(0),

       fullShape_(dims_),

       shape_(fullShape_.begin() + (noBatch_ ? 0 : 1), fullShape_.end()),

       variableDims_(anyNeg(shape_)),

       productDims_(variableDims_ ? -1 : dimProduct(shape_)),

       dname_(model_info.datatype()),

       dtype_(ni::ProtocolStringToDataType(dname_)),

       byteSize_(ni::GetDataTypeByteSize(dtype_)),

       totalByteSize_(0) {

   //create input or output object

   IO* iotmp;

   createObject(&iotmp);

   data_.reset(iotmp);

 }


 template <>

 void TritonInputData::createObject(tc::InferInput** ioptr) {

   tc::InferInput::Create(ioptr, name_, fullShape_, dname_);

 }


 template <>

 void TritonOutputData::createObject(tc::InferRequestedOutput** ioptr) {

   tc::InferRequestedOutput::Create(ioptr, name_);

   //another specialization for output: can't use shared memory if output size is not known

   useShm_ &= !variableDims_;

 }


 template <>

 std::string TritonInputData::xput() const {

   return "input";

 }


 template <>

 std::string TritonOutputData::xput() const {

   return "output";

 }


 template <typename IO>

 tc::InferenceServerGrpcClient* TritonData<IO>::client() {

   return client_->client();

 }


 //setters

 template <typename IO>

 void TritonData<IO>::setShape(const TritonData<IO>::ShapeType& newShape) {

   for (unsigned i = 0; i < newShape.size(); ++i) {

     setShape(i, newShape[i]);

   }

 }


 template <typename IO>

 void TritonData<IO>::setShape(unsigned loc, int64_t val) {

   unsigned locFull = fullLoc(loc);


   //check boundary

   if (locFull >= fullShape_.size())

     throw cms::Exception("TritonDataError")

         << name_ << " setShape(): dimension " << locFull << " out of bounds (" << fullShape_.size() << ")";


   if (val != fullShape_[locFull]) {

     if (dims_[locFull] == -1)

       fullShape_[locFull] = val;

     else

       throw cms::Exception("TritonDataError")

           << name_ << " setShape(): attempt to change value of non-variable shape dimension " << loc;

   }

 }


 template <typename IO>

 void TritonData<IO>::setBatchSize(unsigned bsize) {

   batchSize_ = bsize;

   if (!noBatch_)

     fullShape_[0] = batchSize_;

 }


 template <typename IO>

 void TritonData<IO>::computeSizes() {

   sizeShape_ = sizeShape();

   byteSizePerBatch_ = byteSize_ * sizeShape_;

   totalByteSize_ = byteSizePerBatch_ * batchSize_;

 }

 template <typename IO>

 void TritonData<IO>::resetSizes() {

   sizeShape_ = 0;

   byteSizePerBatch_ = 0;

   totalByteSize_ = 0;

 }


 //create a memory resource if none exists;

 //otherwise, reuse the memory resource, resizing it if necessary

 template <typename IO>

 void TritonData<IO>::updateMem(size_t size) {

   if (!memResource_ or size > memResource_->size()) {

     if (useShm_ and client_->serverType() == TritonServerType::LocalCPU) {

       //avoid unnecessarily throwing in destructor

       if (memResource_)

         memResource_->close();

       //need to destroy before constructing new instance because shared memory key will be reused

       memResource_.reset();

       memResource_ = std::make_shared<TritonCpuShmResource<IO>>(this, shmName_, size);

     }

 #ifdef TRITON_ENABLE_GPU

     else if (useShm_ and client_->serverType() == TritonServerType::LocalGPU) {

       //avoid unnecessarily throwing in destructor

       if (memResource_)

         memResource_->close();

       //need to destroy before constructing new instance because shared memory key will be reused

       memResource_.reset();

       memResource_ = std::make_shared<TritonGpuShmResource<IO>>(this, shmName_, size);

     }

 #endif

     //for remote/heap, size increases don't matter

     else if (!memResource_)

       memResource_ = std::make_shared<TritonHeapResource<IO>>(this, shmName_, size);

   }

 }


 //io accessors

 template <>

 template <typename DT>

 TritonInputContainer<DT> TritonInputData::allocate(bool reserve) {

   //automatically creates a vector for each batch entry (if batch size known)

   auto ptr = std::make_shared<TritonInput<DT>>(batchSize_);

   if (reserve and !anyNeg(shape_)) {

     computeSizes();

     for (auto& vec : *ptr) {

       vec.reserve(sizeShape_);

     }

   }

   return ptr;

 }


 template <>

 template <typename DT>

 void TritonInputData::toServer(TritonInputContainer<DT> ptr) {

   //shouldn't be called twice

   if (done_)

     throw cms::Exception("TritonDataError") << name_ << " toServer() was already called for this event";


   const auto& data_in = *ptr;


   //check batch size

   if (data_in.size() != batchSize_) {

     throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size()

                                             << " but specified batch size is " << batchSize_;

   }


   //shape must be specified for variable dims or if batch size changes

   data_->SetShape(fullShape_);


   //check type

   checkType<DT>();


   computeSizes();

   updateMem(totalByteSize_);

   for (unsigned i0 = 0; i0 < batchSize_; ++i0) {

     memResource_->copyInput(data_in[i0].data(), i0 * byteSizePerBatch_);

   }

   memResource_->set();


   //keep input data in scope

   holder_ = ptr;

   done_ = true;

 }


 //sets up shared memory for outputs, if possible

 template <>

 void TritonOutputData::prepare() {

   computeSizes();

   updateMem(totalByteSize_);

   memResource_->set();

 }


 template <>

 template <typename DT>

 TritonOutput<DT> TritonOutputData::fromServer() const {

   //shouldn't be called twice

   if (done_)

     throw cms::Exception("TritonDataError") << name_ << " fromServer() was already called for this event";


   if (!result_) {

     throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";

   }


   //check type

   checkType<DT>();


   const uint8_t* r0 = memResource_->copyOutput();

   const DT* r1 = reinterpret_cast<const DT*>(r0);


   TritonOutput<DT> dataOut;

   dataOut.reserve(batchSize_);

   for (unsigned i0 = 0; i0 < batchSize_; ++i0) {

     auto offset = i0 * sizeShape_;

     dataOut.emplace_back(r1 + offset, r1 + offset + sizeShape_);

   }


   done_ = true;

   return dataOut;

 }


 template <>

 void TritonInputData::reset() {

   done_ = false;

   holder_.reset();

   data_->Reset();

   //reset shape

   if (variableDims_) {

     for (unsigned i = 0; i < shape_.size(); ++i) {

       unsigned locFull = fullLoc(i);

       fullShape_[locFull] = dims_[locFull];

     }

   }

   resetSizes();

 }


 template <>

 void TritonOutputData::reset() {

   done_ = false;

   result_.reset();

   holder_.reset();

   resetSizes();

 }


 //explicit template instantiation declarations

 template class TritonData<tc::InferInput>;

 template class TritonData<tc::InferRequestedOutput>;


 template TritonInputContainer<char> TritonInputData::allocate(bool reserve);

 template TritonInputContainer<uint8_t> TritonInputData::allocate(bool reserve);

 template TritonInputContainer<uint16_t> TritonInputData::allocate(bool reserve);

 template TritonInputContainer<uint32_t> TritonInputData::allocate(bool reserve);

 template TritonInputContainer<uint64_t> TritonInputData::allocate(bool reserve);

 template TritonInputContainer<int8_t> TritonInputData::allocate(bool reserve);

 template TritonInputContainer<int16_t> TritonInputData::allocate(bool reserve);

 template TritonInputContainer<int32_t> TritonInputData::allocate(bool reserve);

 template TritonInputContainer<int64_t> TritonInputData::allocate(bool reserve);

 template TritonInputContainer<float> TritonInputData::allocate(bool reserve);

 template TritonInputContainer<double> TritonInputData::allocate(bool reserve);


 template void TritonInputData::toServer(TritonInputContainer<char> data_in);

 template void TritonInputData::toServer(TritonInputContainer<uint8_t> data_in);

 template void TritonInputData::toServer(TritonInputContainer<uint16_t> data_in);

 template void TritonInputData::toServer(TritonInputContainer<uint32_t> data_in);

 template void TritonInputData::toServer(TritonInputContainer<uint64_t> data_in);

 template void TritonInputData::toServer(TritonInputContainer<int8_t> data_in);

 template void TritonInputData::toServer(TritonInputContainer<int16_t> data_in);

 template void TritonInputData::toServer(TritonInputContainer<int32_t> data_in);

 template void TritonInputData::toServer(TritonInputContainer<int64_t> data_in);

 template void TritonInputData::toServer(TritonInputContainer<float> data_in);

 template void TritonInputData::toServer(TritonInputContainer<double> data_in);


 template TritonOutput<char> TritonOutputData::fromServer() const;

 template TritonOutput<uint8_t> TritonOutputData::fromServer() const;

 template TritonOutput<uint16_t> TritonOutputData::fromServer() const;

 template TritonOutput<uint32_t> TritonOutputData::fromServer() const;

 template TritonOutput<uint64_t> TritonOutputData::fromServer() const;

 template TritonOutput<int8_t> TritonOutputData::fromServer() const;

 template TritonOutput<int16_t> TritonOutputData::fromServer() const;

 template TritonOutput<int32_t> TritonOutputData::fromServer() const;

 template TritonOutput<int64_t> TritonOutputData::fromServer() const;

 template TritonOutput<float> TritonOutputData::fromServer() const;

 template TritonOutput<double> TritonOutputData::fromServer() const;

TritonData::data_
std::shared_ptr< IO > data_
Definition: TritonData.h:125

TritonData::setBatchSize
void setBatchSize(unsigned bsize)
Definition: TritonData.cc:98

TritonData::fromServer
TritonOutput< DT > fromServer() const
Definition: TritonData.cc:204

mps_fire.i
i
Definition: mps_fire.py:428

TritonInputContainer
std::shared_ptr< TritonInput< DT >> TritonInputContainer
Definition: TritonData.h:41

Exception
Definition: hltDiff.cc:245

MessageLogger.h

or
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventIDconst &, edm::Timestampconst & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12

TritonData::holder_
std::shared_ptr< void > holder_
Definition: TritonData.h:145

TritonData::done_
bool done_
Definition: TritonData.h:149

TritonData::totalByteSize_
size_t totalByteSize_
Definition: TritonData.h:141

SplitLinear.begin
list begin
Definition: SplitLinear.py:25

hgcalPerformanceValidation.val
tuple val
Definition: hgcalPerformanceValidation.py:393

TritonData::byteSizePerBatch_
size_t byteSizePerBatch_
Definition: TritonData.h:140

cond::impl::to_string
std::string to_string(const V &value)
Definition: OMSAccess.h:71

TritonData::fullLoc
unsigned fullLoc(unsigned loc) const
Definition: TritonData.h:92

TritonData::computeSizes
void computeSizes()
Definition: TritonData.cc:105

TritonData::dims_
const ShapeType dims_
Definition: TritonData.h:129

TritonData::xput
std::string xput() const
Definition: TritonData.cc:57

TritonData::useShm_
bool useShm_
Definition: TritonData.h:127

mergeVDriftHistosByStation.name
string name
Definition: mergeVDriftHistosByStation.py:78

TritonData::shape_
ShapeView shape_
Definition: TritonData.h:133

TritonData::updateMem
void updateMem(size_t size)
Definition: TritonData.cc:120

AlCaHLTBitMon_QueryRunRegistry.string
string string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256

TritonClient.h

TritonData::anyNeg
bool anyNeg(const ShapeView &vec) const
Definition: TritonData.h:109

TritonData::sizeShape_
size_t sizeShape_
Definition: TritonData.h:139

TritonData::result_
std::shared_ptr< Result > result_
Definition: TritonData.h:147

TritonData::reset
void reset()
Definition: TritonData.cc:231

TritonData::data
IO * data()
Definition: TritonData.h:96

TritonData.h

TritonServerType::LocalCPU

TritonData::allocate
TritonInputContainer< DT > allocate(bool reserve=true)
Definition: TritonData.cc:149

TritonServerType::LocalGPU

TritonData::prepare
void prepare()
Definition: TritonData.cc:196

TritonData::TritonData
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
Definition: TritonData.cc:18

TritonData::setShape
void setShape(const ShapeType &newShape)

TritonData::TensorMetadata
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
Definition: TritonData.h:50

TritonData::createObject
void createObject(IO **ioptr)

cms::Exception
Definition: Exception.h:70

TritonData::client
triton::client::InferenceServerGrpcClient * client()
Definition: TritonData.cc:67

TritonClient
Definition: TritonClient.h:19

hltrates_dqm_sourceclient-live_cfg.offset
tuple offset
Definition: hltrates_dqm_sourceclient-live_cfg.py:83

TritonData::resetSizes
void resetSizes()
Definition: TritonData.cc:111

TritonOutput
std::vector< edm::Span< const DT * >> TritonOutput
Definition: TritonData.h:37

TritonData::memResource_
std::shared_ptr< TritonMemResource< IO > > memResource_
Definition: TritonData.h:146

TritonData::dname_
std::string dname_
Definition: TritonData.h:136

dataset.end
string end
Definition: dataset.py:937

TritonData::fullShape_
ShapeType fullShape_
Definition: TritonData.h:132

TritonData::name_
std::string name_
Definition: TritonData.h:124

TritonData::ShapeType
std::vector< int64_t > ShapeType
Definition: TritonData.h:51

TritonData::batchSize_
unsigned batchSize_
Definition: TritonData.h:131

HLTObjectMonitor_Client_cff.client
tuple client
Definition: HLTObjectMonitor_Client_cff.py:6

TritonData::variableDims_
bool variableDims_
Definition: TritonData.h:134

TritonData::toServer
void toServer(TritonInputContainer< DT > ptr)
Definition: TritonData.cc:163

TritonData
Definition: TritonData.h:47

edm::Span::size
auto size() const
Definition: Span.h:24

findQualityFiles.size
tuple size
Write out results.
Definition: findQualityFiles.py:443

TritonMemResource.h

GeomDetEnumerators::DT
Definition: GeomDetEnumerators.h:18

diffTwoXMLs.r1
r1
Definition: diffTwoXMLs.py:53