CMS 3D CMS Logo

List of all members | Classes | Public Member Functions | Static Public Member Functions | Protected Member Functions | Protected Attributes | Private Member Functions | Private Attributes
TritonClient Class Reference

#include <TritonClient.h>

Inheritance diagram for TritonClient:
SonicClient< TritonInputMap, TritonOutputMap > SonicClientBase SonicClientTypes< TritonInputMap, TritonOutputMap >

Classes

struct  ServerSideStats
 

Public Member Functions

TritonBatchMode batchMode () const
 
unsigned batchSize () const
 
bool isLocal () const
 
void reset () override
 
void resetBatchMode ()
 
TritonServerType serverType () const
 
void setBatchMode (TritonBatchMode batchMode)
 
bool setBatchSize (unsigned bsize)
 
void setUseSharedMemory (bool useShm)
 
 TritonClient (const edm::ParameterSet &params, const std::string &debugName)
 
bool useSharedMemory () const
 
bool verbose () const
 
 ~TritonClient () override
 
- Public Member Functions inherited from SonicClient< TritonInputMap, TritonOutputMap >
 SonicClient (const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
 
- Public Member Functions inherited from SonicClientBase
const std::string & clientName () const
 
const std::string & debugName () const
 
virtual void dispatch (edm::WaitingTaskWithArenaHolder holder)
 
virtual void dispatch ()
 
SonicMode mode () const
 
 SonicClientBase (const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
 
virtual ~SonicClientBase ()=default
 
- Public Member Functions inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
Inputinput ()
 
const Outputoutput () const
 
virtual ~SonicClientTypes ()=default
 

Static Public Member Functions

static void fillPSetDescription (edm::ParameterSetDescription &iDesc)
 
- Static Public Member Functions inherited from SonicClientBase
static void fillBasePSetDescription (edm::ParameterSetDescription &desc, bool allowRetry=true)
 

Protected Member Functions

void evaluate () override
 
void getResults (const std::vector< std::shared_ptr< triton::client::InferResult >> &results)
 
inference::ModelStatistics getServerSideStatus () const
 
template<typename F >
bool handle_exception (F &&call)
 
unsigned nEntries () const
 
bool noOuterDim () const
 
unsigned outerDim () const
 
void reportServerSideStats (const ServerSideStats &stats) const
 
ServerSideStats summarizeServerStats (const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
 
- Protected Member Functions inherited from SonicClientBase
void finish (bool success, std::exception_ptr eptr=std::exception_ptr{})
 
void setMode (SonicMode mode)
 
void start (edm::WaitingTaskWithArenaHolder holder)
 
void start ()
 

Protected Attributes

TritonBatchMode batchMode_
 
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
 
grpc_compression_algorithm compressionAlgo_
 
triton::client::Headers headers_
 
bool isLocal_
 
bool manualBatchMode_
 
unsigned maxOuterDim_
 
unsigned nEntries_
 
bool noOuterDim_
 
std::vector< triton::client::InferOptions > options_
 
unsigned outerDim_
 
TritonServerType serverType_
 
bool useSharedMemory_
 
bool verbose_
 
- Protected Attributes inherited from SonicClientBase
unsigned allowedTries_
 
std::string clientName_
 
std::string debugName_
 
std::unique_ptr< SonicDispatcherdispatcher_
 
std::string fullDebugName_
 
std::optional< edm::WaitingTaskWithArenaHolderholder_
 
SonicMode mode_
 
unsigned tries_
 
bool verbose_
 
- Protected Attributes inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
Input input_
 
Output output_
 

Private Member Functions

void addEntry (unsigned entry)
 
auto client ()
 
void resizeEntries (unsigned entry)
 

Private Attributes

friend TritonInputData
 
friend TritonOutputData
 

Additional Inherited Members

- Public Types inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
typedef TritonInputMap Input
 
typedef TritonOutputMap Output
 

Detailed Description

Definition at line 21 of file TritonClient.h.

Constructor & Destructor Documentation

◆ TritonClient()

TritonClient::TritonClient ( const edm::ParameterSet params,
const std::string &  debugName 
)

Definition at line 56 of file TritonClient.cc.

References a, client_, edm::conversion(), SonicClientBase::debugName_, SiPixelPhase1Clusters_cfi::e3, Exception, SonicClientBase::fullDebugName_, edm::FileInPath::fullPath(), SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, isLocal_, submitPVResolutionJobs::key, LocalCPU, LocalGPU, SiStripPI::max, maxOuterDim_, mps_check::msg, noOuterDim_, heppy_report::oname, options_, or, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, submitPVValidationJobs::params, TritonService::pid(), triton_utils::printColl(), Rectangular, TritonService::serverInfo(), serverType_, setBatchSize(), SonicClientBase::setMode(), AlCaHLTBitMon_QueryRunRegistry::string, summarizeEdmComparisonLogfiles::success, Sync, TRITON_THROW_IF_ERROR, heppy_batch::val, and verbose_.

57  : SonicClient(params, debugName, "TritonClient"),
59  manualBatchMode_(false),
60  verbose_(params.getUntrackedParameter<bool>("verbose")),
61  useSharedMemory_(params.getUntrackedParameter<bool>("useSharedMemory")),
62  compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter<std::string>("compression"))) {
63  options_.emplace_back(params.getParameter<std::string>("modelName"));
64  //get appropriate server for this model
66  const auto& server =
67  ts->serverInfo(options_[0].model_name_, params.getUntrackedParameter<std::string>("preferredServer"));
68  serverType_ = server.type;
69  edm::LogInfo("TritonDiscovery") << debugName_ << " assigned server: " << server.url;
70  //enforce sync mode for fallback CPU server to avoid contention
71  //todo: could enforce async mode otherwise (unless mode was specified by user?)
75 
76  //connect to the server
78  tc::InferenceServerGrpcClient::Create(&client_, server.url, false, server.useSsl, server.sslOptions),
79  "TritonClient(): unable to create inference context",
80  isLocal_);
81 
82  //set options
83  options_[0].model_version_ = params.getParameter<std::string>("modelVersion");
84  options_[0].client_timeout_ = params.getUntrackedParameter<unsigned>("timeout");
85  //convert to microseconds
86  const auto& timeoutUnit = params.getUntrackedParameter<std::string>("timeoutUnit");
87  unsigned conversion = 1;
88  if (timeoutUnit == "seconds")
89  conversion = 1e6;
90  else if (timeoutUnit == "milliseconds")
91  conversion = 1e3;
92  else if (timeoutUnit == "microseconds")
93  conversion = 1;
94  else
95  throw cms::Exception("Configuration") << "Unknown timeout unit: " << timeoutUnit;
96  options_[0].client_timeout_ *= conversion;
97 
98  //get fixed parameters from local config
99  inference::ModelConfig localModelConfig;
100  {
101  const std::string& localModelConfigPath(params.getParameter<edm::FileInPath>("modelConfigPath").fullPath());
102  int fileDescriptor = open(localModelConfigPath.c_str(), O_RDONLY);
103  if (fileDescriptor < 0)
104  throw TritonException("LocalFailure")
105  << "TritonClient(): unable to open local model config: " << localModelConfigPath;
106  google::protobuf::io::FileInputStream localModelConfigInput(fileDescriptor);
107  localModelConfigInput.SetCloseOnDelete(true);
108  if (!google::protobuf::TextFormat::Parse(&localModelConfigInput, &localModelConfig))
109  throw TritonException("LocalFailure")
110  << "TritonClient(): unable to parse local model config: " << localModelConfigPath;
111  }
112 
113  //check batch size limitations (after i/o setup)
114  //triton uses max batch size = 0 to denote a model that does not support native batching (using the outer dimension)
115  //but for models that do support batching (native or otherwise), a given event may set batch size 0 to indicate no valid input is present
116  //so set the local max to 1 and keep track of "no outer dim" case
117  maxOuterDim_ = localModelConfig.max_batch_size();
118  noOuterDim_ = maxOuterDim_ == 0;
120  //propagate batch size
121  setBatchSize(1);
122 
123  //compare model checksums to remote config to enforce versioning
124  inference::ModelConfigResponse modelConfigResponse;
125  TRITON_THROW_IF_ERROR(client_->ModelConfig(&modelConfigResponse, options_[0].model_name_, options_[0].model_version_),
126  "TritonClient(): unable to get model config",
127  isLocal_);
128  inference::ModelConfig remoteModelConfig(modelConfigResponse.config());
129 
130  std::map<std::string, std::array<std::string, 2>> checksums;
131  size_t fileCounter = 0;
132  for (const auto& modelConfig : {localModelConfig, remoteModelConfig}) {
133  const auto& agents = modelConfig.model_repository_agents().agents();
134  auto agent = std::find_if(agents.begin(), agents.end(), [](auto const& a) { return a.name() == "checksum"; });
135  if (agent != agents.end()) {
136  const auto& params = agent->parameters();
137  for (const auto& [key, val] : params) {
138  // only check the requested version
139  if (key.compare(0, options_[0].model_version_.size() + 1, options_[0].model_version_ + "/") == 0)
140  checksums[key][fileCounter] = val;
141  }
142  }
143  ++fileCounter;
144  }
145  std::vector<std::string> incorrect;
146  for (const auto& [key, val] : checksums) {
147  if (checksums[key][0] != checksums[key][1])
148  incorrect.push_back(key);
149  }
150  if (!incorrect.empty())
151  throw TritonException("ModelVersioning") << "The following files have incorrect checksums on the remote server: "
152  << triton_utils::printColl(incorrect, ", ");
153 
154  //get model info
155  inference::ModelMetadataResponse modelMetadata;
156  TRITON_THROW_IF_ERROR(client_->ModelMetadata(&modelMetadata, options_[0].model_name_, options_[0].model_version_),
157  "TritonClient(): unable to get model metadata",
158  isLocal_);
159 
160  //get input and output (which know their sizes)
161  const auto& nicInputs = modelMetadata.inputs();
162  const auto& nicOutputs = modelMetadata.outputs();
163 
164  //report all model errors at once
165  std::stringstream msg;
166  std::string msg_str;
167 
168  //currently no use case is foreseen for a model with zero inputs or outputs
169  if (nicInputs.empty())
170  msg << "Model on server appears malformed (zero inputs)\n";
171 
172  if (nicOutputs.empty())
173  msg << "Model on server appears malformed (zero outputs)\n";
174 
175  //stop if errors
176  msg_str = msg.str();
177  if (!msg_str.empty())
178  throw cms::Exception("ModelErrors") << msg_str;
179 
180  //setup input map
181  std::stringstream io_msg;
182  if (verbose_)
183  io_msg << "Model inputs: "
184  << "\n";
185  for (const auto& nicInput : nicInputs) {
186  const auto& iname = nicInput.name();
187  auto [curr_itr, success] = input_.emplace(std::piecewise_construct,
188  std::forward_as_tuple(iname),
189  std::forward_as_tuple(iname, nicInput, this, ts->pid()));
190  auto& curr_input = curr_itr->second;
191  if (verbose_) {
192  io_msg << " " << iname << " (" << curr_input.dname() << ", " << curr_input.byteSize()
193  << " b) : " << triton_utils::printColl(curr_input.shape()) << "\n";
194  }
195  }
196 
197  //allow selecting only some outputs from server
198  const auto& v_outputs = params.getUntrackedParameter<std::vector<std::string>>("outputs");
199  std::unordered_set s_outputs(v_outputs.begin(), v_outputs.end());
200 
201  //setup output map
202  if (verbose_)
203  io_msg << "Model outputs: "
204  << "\n";
205  for (const auto& nicOutput : nicOutputs) {
206  const auto& oname = nicOutput.name();
207  if (!s_outputs.empty() and s_outputs.find(oname) == s_outputs.end())
208  continue;
209  auto [curr_itr, success] = output_.emplace(std::piecewise_construct,
210  std::forward_as_tuple(oname),
211  std::forward_as_tuple(oname, nicOutput, this, ts->pid()));
212  auto& curr_output = curr_itr->second;
213  if (verbose_) {
214  io_msg << " " << oname << " (" << curr_output.dname() << ", " << curr_output.byteSize()
215  << " b) : " << triton_utils::printColl(curr_output.shape()) << "\n";
216  }
217  if (!s_outputs.empty())
218  s_outputs.erase(oname);
219  }
220 
221  //check if any requested outputs were not available
222  if (!s_outputs.empty())
223  throw cms::Exception("MissingOutput")
224  << "Some requested outputs were not available on the server: " << triton_utils::printColl(s_outputs);
225 
226  //print model info
227  std::stringstream model_msg;
228  if (verbose_) {
229  model_msg << "Model name: " << options_[0].model_name_ << "\n"
230  << "Model version: " << options_[0].model_version_ << "\n"
231  << "Model max outer dim: " << (noOuterDim_ ? 0 : maxOuterDim_) << "\n";
232  edm::LogInfo(fullDebugName_) << model_msg.str() << io_msg.str();
233  }
234 }
const std::string & pid() const
const std::string & debugName() const
unsigned maxOuterDim_
Definition: TritonClient.h:73
bool setBatchSize(unsigned bsize)
bool noOuterDim_
Definition: TritonClient.h:75
std::string fullPath() const
Definition: FileInPath.cc:161
bool manualBatchMode_
Definition: TritonClient.h:78
bool useSharedMemory_
Definition: TritonClient.h:80
void setMode(SonicMode mode)
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:86
std::string debugName_
TritonServerType serverType_
Definition: TritonClient.h:81
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
SonicClient(const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
Definition: SonicClient.h:12
key
prepare the HTCondor submission files and eventually submit them
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:83
#define TRITON_THROW_IF_ERROR(X, MSG, NOTIFY)
Definition: triton_utils.h:78
Log< level::Info, false > LogInfo
void conversion(EventAux const &from, EventAuxiliary &to)
Definition: EventAux.cc:9
tuple msg
Definition: mps_check.py:286
Server serverInfo(const std::string &model, const std::string &preferred="") const
double a
Definition: hdecay.h:121
std::string fullDebugName_
TritonBatchMode batchMode_
Definition: TritonClient.h:77
std::string printColl(const C &coll, const std::string &delim=", ")
Definition: triton_utils.cc:10
std::vector< triton::client::InferOptions > options_
Definition: TritonClient.h:88
Definition: server.py:1

◆ ~TritonClient()

TritonClient::~TritonClient ( )
override

Definition at line 236 of file TritonClient.cc.

References SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, and SonicClientTypes< TritonInputMap, TritonOutputMap >::output_.

236  {
237  //by default: members of this class destroyed before members of base class
238  //in shared memory case, TritonMemResource (member of TritonData) unregisters from client_ in its destructor
239  //but input/output objects are member of base class, so destroyed after client_ (member of this class)
240  //therefore, clear the maps here
241  input_.clear();
242  output_.clear();
243 }

Member Function Documentation

◆ addEntry()

void TritonClient::addEntry ( unsigned  entry)
private

Definition at line 296 of file TritonClient.cc.

References batchMode_, mps_splice::entry, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, outerDim_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, and Ragged.

Referenced by resizeEntries().

296  {
297  for (auto& element : input_) {
298  element.second.addEntryImpl(entry);
299  }
300  for (auto& element : output_) {
301  element.second.addEntryImpl(entry);
302  }
303  if (entry > 0) {
305  outerDim_ = 1;
306  }
307 }
unsigned outerDim_
Definition: TritonClient.h:74
TritonBatchMode batchMode_
Definition: TritonClient.h:77

◆ batchMode()

TritonBatchMode TritonClient::batchMode ( ) const
inline

Definition at line 42 of file TritonClient.h.

References batchMode_.

Referenced by setBatchMode().

42 { return batchMode_; }
TritonBatchMode batchMode_
Definition: TritonClient.h:77

◆ batchSize()

unsigned TritonClient::batchSize ( ) const

◆ client()

auto TritonClient::client ( )
inlineprivate

Definition at line 95 of file TritonClient.h.

References client_.

Referenced by TritonData< IO >::client().

95 { return client_.get(); }
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:86

◆ evaluate()

void TritonClient::evaluate ( )
overrideprotectedvirtual

Implements SonicClientBase.

Definition at line 363 of file TritonClient.cc.

References Async, batchSize(), client_, compressionAlgo_, SonicClientBase::finish(), getResults(), getServerSideStatus(), handle_exception(), headers_, mps_fire::i, ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::if(), SonicClientTypes< TritonInputMap, TritonOutputMap >::input(), SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, isLocal_, SonicClientBase::mode_, nEntries(), TritonService::notifyCallStatus(), heppy_report::oname, options_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, reportServerSideStats(), mysort::results, dqmMemoryStats::stats, summarizeEdmComparisonLogfiles::success, summarizeServerStats(), SonicClientBase::tries_, TRITON_THROW_IF_ERROR, and verbose().

363  {
364  //undo previous signal from TritonException
365  if (tries_ > 0) {
367  ts->notifyCallStatus(true);
368  }
369 
370  //in case there is nothing to process
371  if (batchSize() == 0) {
372  //call getResults on an empty vector
373  std::vector<std::shared_ptr<tc::InferResult>> empty_results;
374  getResults(empty_results);
375  finish(true);
376  return;
377  }
378 
379  //set up input pointers for triton (generalized for multi-request ragged batching case)
380  //one vector<InferInput*> per request
381  unsigned nEntriesVal = nEntries();
382  std::vector<std::vector<triton::client::InferInput*>> inputsTriton(nEntriesVal);
383  for (auto& inputTriton : inputsTriton) {
384  inputTriton.reserve(input_.size());
385  }
386  for (auto& [iname, input] : input_) {
387  for (unsigned i = 0; i < nEntriesVal; ++i) {
388  inputsTriton[i].push_back(input.data(i));
389  }
390  }
391 
392  //set up output pointers similarly
393  std::vector<std::vector<const triton::client::InferRequestedOutput*>> outputsTriton(nEntriesVal);
394  for (auto& outputTriton : outputsTriton) {
395  outputTriton.reserve(output_.size());
396  }
397  for (auto& [oname, output] : output_) {
398  for (unsigned i = 0; i < nEntriesVal; ++i) {
399  outputsTriton[i].push_back(output.data(i));
400  }
401  }
402 
403  //set up shared memory for output
404  auto success = handle_exception([&]() {
405  for (auto& element : output_) {
406  element.second.prepare();
407  }
408  });
409  if (!success)
410  return;
411 
412  // Get the status of the server prior to the request being made.
413  inference::ModelStatistics start_status;
414  success = handle_exception([&]() {
415  if (verbose())
416  start_status = getServerSideStatus();
417  });
418  if (!success)
419  return;
420 
421  if (mode_ == SonicMode::Async) {
422  //non-blocking call
423  success = handle_exception([&]() {
424  TRITON_THROW_IF_ERROR(client_->AsyncInferMulti(
425  [start_status, this](std::vector<tc::InferResult*> resultsTmp) {
426  //immediately convert to shared_ptr
427  const auto& results = convertToShared(resultsTmp);
428  //check results
429  for (auto ptr : results) {
430  auto success = handle_exception([&]() {
431  TRITON_THROW_IF_ERROR(
432  ptr->RequestStatus(), "evaluate(): unable to get result(s)", isLocal_);
433  });
434  if (!success)
435  return;
436  }
437 
438  if (verbose()) {
439  inference::ModelStatistics end_status;
440  auto success = handle_exception([&]() { end_status = getServerSideStatus(); });
441  if (!success)
442  return;
443 
444  const auto& stats = summarizeServerStats(start_status, end_status);
446  }
447 
448  //check result
449  auto success = handle_exception([&]() { getResults(results); });
450  if (!success)
451  return;
452 
453  //finish
454  finish(true);
455  },
456  options_,
457  inputsTriton,
458  outputsTriton,
459  headers_,
461  "evaluate(): unable to launch async run",
462  isLocal_);
463  });
464  if (!success)
465  return;
466  } else {
467  //blocking call
468  std::vector<tc::InferResult*> resultsTmp;
469  success = handle_exception([&]() {
471  client_->InferMulti(&resultsTmp, options_, inputsTriton, outputsTriton, headers_, compressionAlgo_),
472  "evaluate(): unable to run and/or get result",
473  isLocal_);
474  });
475  //immediately convert to shared_ptr
476  const auto& results = convertToShared(resultsTmp);
477  if (!success)
478  return;
479 
480  if (verbose()) {
481  inference::ModelStatistics end_status;
482  success = handle_exception([&]() { end_status = getServerSideStatus(); });
483  if (!success)
484  return;
485 
486  const auto& stats = summarizeServerStats(start_status, end_status);
488  }
489 
491  if (!success)
492  return;
493 
494  finish(true);
495  }
496 }
bool verbose() const
Definition: TritonClient.h:43
void getResults(const std::vector< std::shared_ptr< triton::client::InferResult >> &results)
void notifyCallStatus(bool status) const
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:86
void finish(bool success, std::exception_ptr eptr=std::exception_ptr{})
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
bool handle_exception(F &&call)
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:83
#define TRITON_THROW_IF_ERROR(X, MSG, NOTIFY)
Definition: triton_utils.h:78
inference::ModelStatistics getServerSideStatus() const
triton::client::Headers headers_
Definition: TritonClient.h:84
unsigned nEntries() const
unsigned batchSize() const
void reportServerSideStats(const ServerSideStats &stats) const
results
Definition: mysort.py:8
Definition: output.py:1
std::vector< triton::client::InferOptions > options_
Definition: TritonClient.h:88
if(threadIdxLocalY==0 &&threadIdxLocalX==0)

◆ fillPSetDescription()

void TritonClient::fillPSetDescription ( edm::ParameterSetDescription iDesc)
static

Definition at line 568 of file TritonClient.cc.

References edm::ParameterSetDescription::add(), edm::ParameterSetDescription::addUntracked(), SonicClientBase::fillBasePSetDescription(), edm::ParameterSetDescription::ifValue(), and AlCaHLTBitMon_QueryRunRegistry::string.

Referenced by DeepTauIdSonicProducer::fillDescriptions(), DeepMETSonicProducer::fillDescriptions(), ParticleTransformerAK4SonicJetTagsProducer::fillDescriptions(), ParticleNetSonicJetTagsProducer::fillDescriptions(), SCEnergyCorrectorDRNProducer::fillDescriptions(), and DRNCorrectionProducerT< T >::fillDescriptions().

568  {
569  edm::ParameterSetDescription descClient;
570  fillBasePSetDescription(descClient);
571  descClient.add<std::string>("modelName");
572  descClient.add<std::string>("modelVersion", "");
573  descClient.add<edm::FileInPath>("modelConfigPath");
574  //server parameters should not affect the physics results
575  descClient.addUntracked<std::string>("preferredServer", "");
576  descClient.addUntracked<unsigned>("timeout");
577  descClient.ifValue(edm::ParameterDescription<std::string>("timeoutUnit", "seconds", false),
578  edm::allowedValues<std::string>("seconds", "milliseconds", "microseconds"));
579  descClient.addUntracked<bool>("useSharedMemory", true);
580  descClient.addUntracked<std::string>("compression", "");
581  descClient.addUntracked<std::vector<std::string>>("outputs", {});
582  iDesc.add<edm::ParameterSetDescription>("Client", descClient);
583 }
ParameterDescriptionNode * ifValue(ParameterDescription< T > const &switchParameter, std::unique_ptr< ParameterDescriptionCases< T >> cases)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
ParameterDescriptionBase * add(U const &iLabel, T const &value)
static void fillBasePSetDescription(edm::ParameterSetDescription &desc, bool allowRetry=true)

◆ getResults()

void TritonClient::getResults ( const std::vector< std::shared_ptr< triton::client::InferResult >> &  results)
protected

Definition at line 340 of file TritonClient.cc.

References mps_fire::i, noOuterDim_, heppy_report::oname, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, mps_fire::result, mysort::results, and TRITON_THROW_IF_ERROR.

Referenced by evaluate().

340  {
341  for (unsigned i = 0; i < results.size(); ++i) {
342  const auto& result = results[i];
343  for (auto& [oname, output] : output_) {
344  //set shape here before output becomes const
345  if (output.variableDims()) {
346  std::vector<int64_t> tmp_shape;
348  result->Shape(oname, &tmp_shape), "getResults(): unable to get output shape for " + oname, false);
349  if (!noOuterDim_)
350  tmp_shape.erase(tmp_shape.begin());
351  output.setShape(tmp_shape, i);
352  }
353  //extend lifetime
354  output.setResult(result, i);
355  //compute size after getting all result entries
356  if (i == results.size() - 1)
357  output.computeSizes();
358  }
359  }
360 }
bool noOuterDim_
Definition: TritonClient.h:75
#define TRITON_THROW_IF_ERROR(X, MSG, NOTIFY)
Definition: triton_utils.h:78
results
Definition: mysort.py:8
Definition: output.py:1

◆ getServerSideStatus()

inference::ModelStatistics TritonClient::getServerSideStatus ( ) const
protected

Definition at line 556 of file TritonClient.cc.

References client_, isLocal_, options_, TRITON_THROW_IF_ERROR, and verbose_.

Referenced by evaluate().

556  {
557  if (verbose_) {
558  inference::ModelStatisticsResponse resp;
559  TRITON_THROW_IF_ERROR(client_->ModelInferenceStatistics(&resp, options_[0].model_name_, options_[0].model_version_),
560  "getServerSideStatus(): unable to get model statistics",
561  isLocal_);
562  return *(resp.model_stats().begin());
563  }
564  return inference::ModelStatistics{};
565 }
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:86
#define TRITON_THROW_IF_ERROR(X, MSG, NOTIFY)
Definition: triton_utils.h:78
std::vector< triton::client::InferOptions > options_
Definition: TritonClient.h:88

◆ handle_exception()

template<typename F >
bool TritonClient::handle_exception ( F &&  call)
protected

Definition at line 321 of file TritonClient.cc.

References CMS_SA_ALLOW, MillePedeFileConverter_cfg::e, and SonicClientBase::finish().

Referenced by evaluate().

321  {
322  //caught exceptions will be propagated to edm::WaitingTaskWithArenaHolder
323  CMS_SA_ALLOW try {
324  call();
325  return true;
326  }
327  //TritonExceptions are intended/expected to be recoverable, i.e. retries should be allowed
328  catch (TritonException& e) {
329  e.convertToWarning();
330  finish(false);
331  return false;
332  }
333  //other exceptions are not: execution should stop if they are encountered
334  catch (...) {
335  finish(false, std::current_exception());
336  return false;
337  }
338 }
#define CMS_SA_ALLOW
void finish(bool success, std::exception_ptr eptr=std::exception_ptr{})

◆ isLocal()

bool TritonClient::isLocal ( ) const
inline

Definition at line 51 of file TritonClient.h.

References isLocal_.

51 { return isLocal_; }

◆ nEntries()

unsigned TritonClient::nEntries ( ) const
protected

Definition at line 259 of file TritonClient.cc.

References SonicClientTypes< TritonInputMap, TritonOutputMap >::input_.

Referenced by batchSize(), evaluate(), and resizeEntries().

259 { return !input_.empty() ? input_.begin()->second.entries_.size() : 0; }

◆ noOuterDim()

bool TritonClient::noOuterDim ( ) const
inlineprotected

Definition at line 58 of file TritonClient.h.

References noOuterDim_.

Referenced by TritonData< IO >::fullLoc(), and TritonData< IO >::toServer().

58 { return noOuterDim_; }
bool noOuterDim_
Definition: TritonClient.h:75

◆ outerDim()

unsigned TritonClient::outerDim ( ) const
inlineprotected

Definition at line 59 of file TritonClient.h.

References outerDim_.

Referenced by TritonData< IO >::computeSizes(), TritonData< IO >::fromServer(), and TritonData< IO >::toServer().

59 { return outerDim_; }
unsigned outerDim_
Definition: TritonClient.h:74

◆ reportServerSideStats()

void TritonClient::reportServerSideStats ( const ServerSideStats stats) const
protected

Definition at line 498 of file TritonClient.cc.

References ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), submitPVResolutionJobs::count, SonicClientBase::debugName_, SonicClientBase::fullDebugName_, mps_check::msg, and dqmMemoryStats::stats.

Referenced by evaluate().

498  {
499  std::stringstream msg;
500 
501  // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/perf_client/inference_profiler.cc
502  const uint64_t count = stats.success_count_;
503  msg << " Inference count: " << stats.inference_count_ << "\n";
504  msg << " Execution count: " << stats.execution_count_ << "\n";
505  msg << " Successful request count: " << count << "\n";
506 
507  if (count > 0) {
508  auto get_avg_us = [count](uint64_t tval) {
509  constexpr uint64_t us_to_ns = 1000;
510  return tval / us_to_ns / count;
511  };
512 
513  const uint64_t cumm_avg_us = get_avg_us(stats.cumm_time_ns_);
514  const uint64_t queue_avg_us = get_avg_us(stats.queue_time_ns_);
515  const uint64_t compute_input_avg_us = get_avg_us(stats.compute_input_time_ns_);
516  const uint64_t compute_infer_avg_us = get_avg_us(stats.compute_infer_time_ns_);
517  const uint64_t compute_output_avg_us = get_avg_us(stats.compute_output_time_ns_);
518  const uint64_t compute_avg_us = compute_input_avg_us + compute_infer_avg_us + compute_output_avg_us;
519  const uint64_t overhead =
520  (cumm_avg_us > queue_avg_us + compute_avg_us) ? (cumm_avg_us - queue_avg_us - compute_avg_us) : 0;
521 
522  msg << " Avg request latency: " << cumm_avg_us << " usec"
523  << "\n"
524  << " (overhead " << overhead << " usec + "
525  << "queue " << queue_avg_us << " usec + "
526  << "compute input " << compute_input_avg_us << " usec + "
527  << "compute infer " << compute_infer_avg_us << " usec + "
528  << "compute output " << compute_output_avg_us << " usec)" << std::endl;
529  }
530 
531  if (!debugName_.empty())
532  edm::LogInfo(fullDebugName_) << msg.str();
533 }
std::string debugName_
unsigned long long uint64_t
Definition: Time.h:13
tuple msg
Definition: mps_check.py:286
std::string fullDebugName_

◆ reset()

void TritonClient::reset ( void  )
overridevirtual

Reimplemented from SonicClientBase.

Definition at line 309 of file TritonClient.cc.

References batchMode_, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, manualBatchMode_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, and Rectangular.

309  {
310  if (!manualBatchMode_)
312  for (auto& element : input_) {
313  element.second.reset();
314  }
315  for (auto& element : output_) {
316  element.second.reset();
317  }
318 }
bool manualBatchMode_
Definition: TritonClient.h:78
TritonBatchMode batchMode_
Definition: TritonClient.h:77

◆ resetBatchMode()

void TritonClient::resetBatchMode ( )

Definition at line 254 of file TritonClient.cc.

References batchMode_, manualBatchMode_, and Rectangular.

254  {
256  manualBatchMode_ = false;
257 }
bool manualBatchMode_
Definition: TritonClient.h:78
TritonBatchMode batchMode_
Definition: TritonClient.h:77

◆ resizeEntries()

void TritonClient::resizeEntries ( unsigned  entry)
private

Definition at line 282 of file TritonClient.cc.

References addEntry(), mps_splice::entry, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, nEntries(), and SonicClientTypes< TritonInputMap, TritonOutputMap >::output_.

Referenced by setBatchSize().

282  {
283  if (entry > nEntries())
284  //addEntry(entry) extends the vector to size entry+1
285  addEntry(entry - 1);
286  else if (entry < nEntries()) {
287  for (auto& element : input_) {
288  element.second.entries_.resize(entry);
289  }
290  for (auto& element : output_) {
291  element.second.entries_.resize(entry);
292  }
293  }
294 }
void addEntry(unsigned entry)
unsigned nEntries() const

◆ serverType()

TritonServerType TritonClient::serverType ( ) const
inline

Definition at line 50 of file TritonClient.h.

References serverType_.

Referenced by TritonData< IO >::updateMem().

50 { return serverType_; }
TritonServerType serverType_
Definition: TritonClient.h:81

◆ setBatchMode()

void TritonClient::setBatchMode ( TritonBatchMode  batchMode)

Definition at line 245 of file TritonClient.cc.

References batchMode(), batchMode_, batchSize(), manualBatchMode_, and setBatchSize().

245  {
246  unsigned oldBatchSize = batchSize();
248  manualBatchMode_ = true;
249  //this allows calling setBatchSize() and setBatchMode() in either order consistently to change back and forth
250  //includes handling of change from ragged to rectangular if multiple entries already created
251  setBatchSize(oldBatchSize);
252 }
bool setBatchSize(unsigned bsize)
bool manualBatchMode_
Definition: TritonClient.h:78
TritonBatchMode batchMode() const
Definition: TritonClient.h:42
unsigned batchSize() const
TritonBatchMode batchMode_
Definition: TritonClient.h:77

◆ setBatchSize()

bool TritonClient::setBatchSize ( unsigned  bsize)

Definition at line 263 of file TritonClient.cc.

References batchMode_, maxOuterDim_, SiStripPI::min, outerDim_, Rectangular, and resizeEntries().

Referenced by setBatchMode(), and TritonClient().

263  {
265  if (bsize > maxOuterDim_) {
266  throw TritonException("LocalFailure")
267  << "Requested batch size " << bsize << " exceeds server-specified max batch size " << maxOuterDim_ << ".";
268  return false;
269  } else {
270  outerDim_ = bsize;
271  //take min to allow resizing to 0
273  return true;
274  }
275  } else {
276  resizeEntries(bsize);
277  outerDim_ = 1;
278  return true;
279  }
280 }
unsigned maxOuterDim_
Definition: TritonClient.h:73
void resizeEntries(unsigned entry)
unsigned outerDim_
Definition: TritonClient.h:74
TritonBatchMode batchMode_
Definition: TritonClient.h:77

◆ setUseSharedMemory()

void TritonClient::setUseSharedMemory ( bool  useShm)
inline

Definition at line 45 of file TritonClient.h.

References useSharedMemory_.

45 { useSharedMemory_ = useShm; }
bool useSharedMemory_
Definition: TritonClient.h:80

◆ summarizeServerStats()

TritonClient::ServerSideStats TritonClient::summarizeServerStats ( const inference::ModelStatistics &  start_status,
const inference::ModelStatistics &  end_status 
) const
protected

Definition at line 535 of file TritonClient.cc.

References TritonClient::ServerSideStats::compute_infer_time_ns_, TritonClient::ServerSideStats::compute_input_time_ns_, TritonClient::ServerSideStats::compute_output_time_ns_, TritonClient::ServerSideStats::cumm_time_ns_, TritonClient::ServerSideStats::execution_count_, TritonClient::ServerSideStats::inference_count_, TritonClient::ServerSideStats::queue_time_ns_, and TritonClient::ServerSideStats::success_count_.

Referenced by evaluate().

536  {
537  TritonClient::ServerSideStats server_stats;
538 
539  server_stats.inference_count_ = end_status.inference_count() - start_status.inference_count();
540  server_stats.execution_count_ = end_status.execution_count() - start_status.execution_count();
541  server_stats.success_count_ =
542  end_status.inference_stats().success().count() - start_status.inference_stats().success().count();
543  server_stats.cumm_time_ns_ =
544  end_status.inference_stats().success().ns() - start_status.inference_stats().success().ns();
545  server_stats.queue_time_ns_ = end_status.inference_stats().queue().ns() - start_status.inference_stats().queue().ns();
546  server_stats.compute_input_time_ns_ =
547  end_status.inference_stats().compute_input().ns() - start_status.inference_stats().compute_input().ns();
548  server_stats.compute_infer_time_ns_ =
549  end_status.inference_stats().compute_infer().ns() - start_status.inference_stats().compute_infer().ns();
550  server_stats.compute_output_time_ns_ =
551  end_status.inference_stats().compute_output().ns() - start_status.inference_stats().compute_output().ns();
552 
553  return server_stats;
554 }

◆ useSharedMemory()

bool TritonClient::useSharedMemory ( ) const
inline

Definition at line 44 of file TritonClient.h.

References useSharedMemory_.

44 { return useSharedMemory_; }
bool useSharedMemory_
Definition: TritonClient.h:80

◆ verbose()

bool TritonClient::verbose ( ) const
inline

Definition at line 43 of file TritonClient.h.

References verbose_.

Referenced by evaluate().

43 { return verbose_; }

Member Data Documentation

◆ batchMode_

TritonBatchMode TritonClient::batchMode_
protected

◆ client_

std::unique_ptr<triton::client::InferenceServerGrpcClient> TritonClient::client_
protected

Definition at line 86 of file TritonClient.h.

Referenced by client(), evaluate(), getServerSideStatus(), and TritonClient().

◆ compressionAlgo_

grpc_compression_algorithm TritonClient::compressionAlgo_
protected

Definition at line 83 of file TritonClient.h.

Referenced by evaluate().

◆ headers_

triton::client::Headers TritonClient::headers_
protected

Definition at line 84 of file TritonClient.h.

Referenced by evaluate().

◆ isLocal_

bool TritonClient::isLocal_
protected

Definition at line 82 of file TritonClient.h.

Referenced by evaluate(), getServerSideStatus(), isLocal(), and TritonClient().

◆ manualBatchMode_

bool TritonClient::manualBatchMode_
protected

Definition at line 78 of file TritonClient.h.

Referenced by reset(), resetBatchMode(), and setBatchMode().

◆ maxOuterDim_

unsigned TritonClient::maxOuterDim_
protected

Definition at line 73 of file TritonClient.h.

Referenced by setBatchSize(), and TritonClient().

◆ nEntries_

unsigned TritonClient::nEntries_
protected

Definition at line 76 of file TritonClient.h.

◆ noOuterDim_

bool TritonClient::noOuterDim_
protected

Definition at line 75 of file TritonClient.h.

Referenced by getResults(), noOuterDim(), and TritonClient().

◆ options_

std::vector<triton::client::InferOptions> TritonClient::options_
protected

◆ outerDim_

unsigned TritonClient::outerDim_
protected

Definition at line 74 of file TritonClient.h.

Referenced by addEntry(), batchSize(), outerDim(), and setBatchSize().

◆ serverType_

TritonServerType TritonClient::serverType_
protected

Definition at line 81 of file TritonClient.h.

Referenced by serverType(), and TritonClient().

◆ TritonInputData

friend TritonClient::TritonInputData
private

Definition at line 91 of file TritonClient.h.

◆ TritonOutputData

friend TritonClient::TritonOutputData
private

Definition at line 92 of file TritonClient.h.

◆ useSharedMemory_

bool TritonClient::useSharedMemory_
protected

Definition at line 80 of file TritonClient.h.

Referenced by setUseSharedMemory(), and useSharedMemory().

◆ verbose_

bool TritonClient::verbose_
protected

Definition at line 79 of file TritonClient.h.

Referenced by getServerSideStatus(), TritonClient(), and verbose().