d6/d59/TritonData_8cc_source.html

 #include "HeterogeneousCore/SonicTriton/interface/TritonData.h"

 #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h"

 #include "HeterogeneousCore/SonicTriton/interface/TritonMemResource.h"

 #include "HeterogeneousCore/SonicTriton/interface/triton_utils.h"

 #include "FWCore/MessageLogger/interface/MessageLogger.h"


 #include "model_config.pb.h"

 #include "model_config.h"


 #include <sstream>


 namespace ni = nvidia::inferenceserver;

 namespace tc = triton::client;


 //dims: kept constant, represents config.pbtxt parameters of model (converted from google::protobuf::RepeatedField to vector)

 //fullShape: if batching is enabled, first entry is batch size; values can be modified

 //shape: view into fullShape, excluding batch size entry

 template <typename IO>

 TritonData<IO>::TritonData(const std::string& name,

                            const TritonData<IO>::TensorMetadata& model_info,

                            TritonClient* client,

                            const std::string& pid)

     : name_(name),

       client_(client),

       useShm_(client_->useSharedMemory()),

       //ensure unique name for shared memory region

       shmName_(useShm_ ? pid + "_" + xput() + std::to_string(uid()) : ""),

       dims_(model_info.shape().begin(), model_info.shape().end()),

       noBatch_(client_->noBatch()),

       batchSize_(0),

       fullShape_(dims_),

       shape_(fullShape_.begin() + (noBatch_ ? 0 : 1), fullShape_.end()),

       variableDims_(anyNeg(shape_)),

       productDims_(variableDims_ ? -1 : dimProduct(shape_)),

       dname_(model_info.datatype()),

       dtype_(ni::ProtocolStringToDataType(dname_)),

       byteSize_(ni::GetDataTypeByteSize(dtype_)),

       totalByteSize_(0) {

   //create input or output object

   IO* iotmp;

   createObject(&iotmp);

   data_.reset(iotmp);

 }


 template <>

 void TritonInputData::createObject(tc::InferInput** ioptr) {

   tc::InferInput::Create(ioptr, name_, fullShape_, dname_);

 }


 template <>

 void TritonOutputData::createObject(tc::InferRequestedOutput** ioptr) {

   tc::InferRequestedOutput::Create(ioptr, name_);

   //another specialization for output: can't use shared memory if output size is not known

   useShm_ &= !variableDims_;

 }


 template <>

 std::string TritonInputData::xput() const {

   return "input";

 }


 template <>

 std::string TritonOutputData::xput() const {

   return "output";

 }


 template <typename IO>

 tc::InferenceServerGrpcClient* TritonData<IO>::client() {

   return client_->client();

 }


 //setters

 template <typename IO>

 void TritonData<IO>::setShape(const TritonData<IO>::ShapeType& newShape) {

   for (unsigned i = 0; i < newShape.size(); ++i) {

     setShape(i, newShape[i]);

   }

 }


 template <typename IO>

 void TritonData<IO>::setShape(unsigned loc, int64_t val) {

   unsigned locFull = fullLoc(loc);


   //check boundary

   if (locFull >= fullShape_.size())

     throw cms::Exception("TritonDataError")

         << name_ << " setShape(): dimension " << locFull << " out of bounds (" << fullShape_.size() << ")";


   if (val != fullShape_[locFull]) {

     if (dims_[locFull] == -1)

       fullShape_[locFull] = val;

     else

       throw cms::Exception("TritonDataError")

           << name_ << " setShape(): attempt to change value of non-variable shape dimension " << loc;

   }

 }


 template <typename IO>

 void TritonData<IO>::setBatchSize(unsigned bsize) {

   batchSize_ = bsize;

   if (!noBatch_)

     fullShape_[0] = batchSize_;

 }


 template <typename IO>

 void TritonData<IO>::computeSizes() {

   sizeShape_ = sizeShape();

   byteSizePerBatch_ = byteSize_ * sizeShape_;

   totalByteSize_ = byteSizePerBatch_ * batchSize_;

 }

 template <typename IO>

 void TritonData<IO>::resetSizes() {

   sizeShape_ = 0;

   byteSizePerBatch_ = 0;

   totalByteSize_ = 0;

 }


 //create a memory resource if none exists;

 //otherwise, reuse the memory resource, resizing it if necessary

 template <typename IO>

 void TritonData<IO>::updateMem(size_t size) {

   if (!memResource_ or size > memResource_->size()) {

     if (useShm_ and client_->serverType() == TritonServerType::LocalCPU) {

       //avoid unnecessarily throwing in destructor

       if (memResource_)

         memResource_->close();

       //need to destroy before constructing new instance because shared memory key will be reused

       memResource_.reset();

       memResource_ = std::make_shared<TritonCpuShmResource<IO>>(this, shmName_, size);

     }

 #ifdef TRITON_ENABLE_GPU

     else if (useShm_ and client_->serverType() == TritonServerType::LocalGPU) {

       //avoid unnecessarily throwing in destructor

       if (memResource_)

         memResource_->close();

       //need to destroy before constructing new instance because shared memory key will be reused

       memResource_.reset();

       memResource_ = std::make_shared<TritonGpuShmResource<IO>>(this, shmName_, size);

     }

 #endif

     //for remote/heap, size increases don't matter

     else if (!memResource_)

       memResource_ = std::make_shared<TritonHeapResource<IO>>(this, shmName_, size);

   }

 }


 //io accessors

 template <>

 template <typename DT>

 TritonInputContainer<DT> TritonInputData::allocate(bool reserve) {

   //automatically creates a vector for each batch entry (if batch size known)

   auto ptr = std::make_shared<TritonInput<DT>>(batchSize_);

   if (reserve and !anyNeg(shape_)) {

     computeSizes();

     for (auto& vec : *ptr) {

       vec.reserve(sizeShape_);

     }

   }

   return ptr;

 }


 template <>

 template <typename DT>

 void TritonInputData::toServer(TritonInputContainer<DT> ptr) {

   //shouldn't be called twice

   if (done_)

     throw cms::Exception("TritonDataError") << name_ << " toServer() was already called for this event";


   const auto& data_in = *ptr;


   //check batch size

   if (data_in.size() != batchSize_) {

     throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size()

                                             << " but specified batch size is " << batchSize_;

   }


   //shape must be specified for variable dims or if batch size changes

   data_->SetShape(fullShape_);


   if (byteSize_ != sizeof(DT))

     throw cms::Exception("TritonDataError") << name_ << " toServer(): inconsistent byte size " << sizeof(DT)

                                             << " (should be " << byteSize_ << " for " << dname_ << ")";


   computeSizes();

   updateMem(totalByteSize_);

   for (unsigned i0 = 0; i0 < batchSize_; ++i0) {

     memResource_->copyInput(data_in[i0].data(), i0 * byteSizePerBatch_);

   }

   memResource_->set();


   //keep input data in scope

   holder_ = ptr;

   done_ = true;

 }


 //sets up shared memory for outputs, if possible

 template <>

 void TritonOutputData::prepare() {

   computeSizes();

   updateMem(totalByteSize_);

   memResource_->set();

 }


 template <>

 template <typename DT>

 TritonOutput<DT> TritonOutputData::fromServer() const {

   //shouldn't be called twice

   if (done_)

     throw cms::Exception("TritonDataError") << name_ << " fromServer() was already called for this event";


   if (!result_) {

     throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";

   }


   if (byteSize_ != sizeof(DT)) {

     throw cms::Exception("TritonDataError") << name_ << " fromServer(): inconsistent byte size " << sizeof(DT)

                                             << " (should be " << byteSize_ << " for " << dname_ << ")";

   }


   const uint8_t* r0 = memResource_->copyOutput();

   const DT* r1 = reinterpret_cast<const DT*>(r0);


   TritonOutput<DT> dataOut;

   dataOut.reserve(batchSize_);

   for (unsigned i0 = 0; i0 < batchSize_; ++i0) {

     auto offset = i0 * sizeShape_;

     dataOut.emplace_back(r1 + offset, r1 + offset + sizeShape_);

   }


   done_ = true;

   return dataOut;

 }


 template <>

 void TritonInputData::reset() {

   done_ = false;

   holder_.reset();

   data_->Reset();

   //reset shape

   if (variableDims_) {

     for (unsigned i = 0; i < shape_.size(); ++i) {

       unsigned locFull = fullLoc(i);

       fullShape_[locFull] = dims_[locFull];

     }

   }

   resetSizes();

 }


 template <>

 void TritonOutputData::reset() {

   done_ = false;

   result_.reset();

   holder_.reset();

   resetSizes();

 }


 //explicit template instantiation declarations

 template class TritonData<tc::InferInput>;

 template class TritonData<tc::InferRequestedOutput>;


 template TritonInputContainer<float> TritonInputData::allocate(bool reserve);

 template TritonInputContainer<int64_t> TritonInputData::allocate(bool reserve);


 template void TritonInputData::toServer(TritonInputContainer<float> data_in);

 template void TritonInputData::toServer(TritonInputContainer<int64_t> data_in);


 template TritonOutput<float> TritonOutputData::fromServer() const;

TritonData::data_
std::shared_ptr< IO > data_
Definition: TritonData.h:117

TritonData::setBatchSize
void setBatchSize(unsigned bsize)
Definition: TritonData.cc:99

mps_fire.i
i
Definition: mps_fire.py:428

TritonInputContainer
std::shared_ptr< TritonInput< DT >> TritonInputContainer
Definition: TritonData.h:39

Exception
Definition: hltDiff.cc:245

MessageLogger.h

or
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventIDconst &, edm::Timestampconst & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12

TritonData::holder_
std::shared_ptr< void > holder_
Definition: TritonData.h:137

TritonData::done_
bool done_
Definition: TritonData.h:141

TritonData::totalByteSize_
size_t totalByteSize_
Definition: TritonData.h:133

SplitLinear.begin
list begin
Definition: SplitLinear.py:25

TritonData::allocate
TritonInputContainer< DT > allocate(bool reserve=true)
Definition: TritonData.cc:150

hgcalPerformanceValidation.val
tuple val
Definition: hgcalPerformanceValidation.py:364

TritonData::byteSizePerBatch_
size_t byteSizePerBatch_
Definition: TritonData.h:132

TritonData::fullLoc
unsigned fullLoc(unsigned loc) const
Definition: TritonData.h:90

TritonData::computeSizes
void computeSizes()
Definition: TritonData.cc:106

TritonData::dims_
const ShapeType dims_
Definition: TritonData.h:121

TritonData::xput
std::string xput() const
Definition: TritonData.cc:58

TritonData::useShm_
bool useShm_
Definition: TritonData.h:119

mergeVDriftHistosByStation.name
string name
Definition: mergeVDriftHistosByStation.py:78

TritonData::shape_
ShapeView shape_
Definition: TritonData.h:125

TritonData::updateMem
void updateMem(size_t size)
Definition: TritonData.cc:121

AlCaHLTBitMon_QueryRunRegistry.string
string string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256

TritonClient.h

TritonData::anyNeg
bool anyNeg(const ShapeView &vec) const
Definition: TritonData.h:101

TritonData::byteSize_
int64_t byteSize_
Definition: TritonData.h:130

TritonData::sizeShape_
size_t sizeShape_
Definition: TritonData.h:131

TritonData::result_
std::shared_ptr< Result > result_
Definition: TritonData.h:139

TritonData::reset
void reset()
Definition: TritonData.cc:235

TritonData::data
IO * data()
Definition: TritonData.h:94

TritonData.h

TritonServerType::LocalCPU

TritonServerType::LocalGPU

TritonData::prepare
void prepare()
Definition: TritonData.cc:198

TritonData::TritonData
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
Definition: TritonData.cc:19

TritonData::setShape
void setShape(const ShapeType &newShape)

TritonData::TensorMetadata
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
Definition: TritonData.h:48

TritonData::createObject
void createObject(IO **ioptr)

cms::Exception
Definition: Exception.h:70

TritonData::fromServer
TritonOutput< DT > fromServer() const
Definition: TritonData.cc:206

TritonData::client
triton::client::InferenceServerGrpcClient * client()
Definition: TritonData.cc:68

TritonClient
Definition: TritonClient.h:19

hltrates_dqm_sourceclient-live_cfg.offset
tuple offset
Definition: hltrates_dqm_sourceclient-live_cfg.py:82

triton_utils.h

TritonData::resetSizes
void resetSizes()
Definition: TritonData.cc:112

TritonOutput
std::vector< edm::Span< const DT * >> TritonOutput
Definition: TritonData.h:35

TritonData::memResource_
std::shared_ptr< TritonMemResource< IO > > memResource_
Definition: TritonData.h:138

TritonData::dname_
std::string dname_
Definition: TritonData.h:128

dataset.end
string end
Definition: dataset.py:937

TritonData::fullShape_
ShapeType fullShape_
Definition: TritonData.h:124

TritonData::name_
std::string name_
Definition: TritonData.h:116

TritonData::ShapeType
std::vector< int64_t > ShapeType
Definition: TritonData.h:49

TritonData::batchSize_
unsigned batchSize_
Definition: TritonData.h:123

HLTObjectMonitor_Client_cff.client
tuple client
Definition: HLTObjectMonitor_Client_cff.py:6

TritonData::variableDims_
bool variableDims_
Definition: TritonData.h:126

TritonData::toServer
void toServer(TritonInputContainer< DT > ptr)
Definition: TritonData.cc:164

TritonData
Definition: TritonData.h:45

edm::Span::size
auto size() const
Definition: Span.h:24

findQualityFiles.size
tuple size
Write out results.
Definition: findQualityFiles.py:443

TritonMemResource.h

GeomDetEnumerators::DT
Definition: GeomDetEnumerators.h:18

diffTwoXMLs.r1
r1
Definition: diffTwoXMLs.py:53