CMS 3D CMS Logo

List of all members | Classes | Public Member Functions | Static Public Member Functions | Protected Member Functions | Protected Attributes | Private Member Functions | Private Attributes
TritonClient Class Reference

#include <TritonClient.h>

Inheritance diagram for TritonClient:
SonicClient< TritonInputMap, TritonOutputMap > SonicClientBase SonicClientTypes< TritonInputMap, TritonOutputMap >

Classes

struct  ServerSideStats
 

Public Member Functions

TritonBatchMode batchMode () const
 
unsigned batchSize () const
 
void reset () override
 
void resetBatchMode ()
 
TritonServerType serverType () const
 
void setBatchMode (TritonBatchMode batchMode)
 
bool setBatchSize (unsigned bsize)
 
void setUseSharedMemory (bool useShm)
 
 TritonClient (const edm::ParameterSet &params, const std::string &debugName)
 
bool useSharedMemory () const
 
bool verbose () const
 
 ~TritonClient () override
 
- Public Member Functions inherited from SonicClient< TritonInputMap, TritonOutputMap >
 SonicClient (const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
 
- Public Member Functions inherited from SonicClientBase
const std::string & clientName () const
 
const std::string & debugName () const
 
virtual void dispatch (edm::WaitingTaskWithArenaHolder holder)
 
virtual void dispatch ()
 
SonicMode mode () const
 
 SonicClientBase (const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
 
virtual ~SonicClientBase ()=default
 
- Public Member Functions inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
Inputinput ()
 
const Outputoutput () const
 
virtual ~SonicClientTypes ()=default
 

Static Public Member Functions

static void fillPSetDescription (edm::ParameterSetDescription &iDesc)
 
- Static Public Member Functions inherited from SonicClientBase
static void fillBasePSetDescription (edm::ParameterSetDescription &desc, bool allowRetry=true)
 

Protected Member Functions

void evaluate () override
 
void getResults (const std::vector< std::shared_ptr< triton::client::InferResult >> &results)
 
inference::ModelStatistics getServerSideStatus () const
 
template<typename F >
bool handle_exception (F &&call)
 
unsigned nEntries () const
 
bool noOuterDim () const
 
unsigned outerDim () const
 
void reportServerSideStats (const ServerSideStats &stats) const
 
ServerSideStats summarizeServerStats (const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
 
- Protected Member Functions inherited from SonicClientBase
void finish (bool success, std::exception_ptr eptr=std::exception_ptr{})
 
void setMode (SonicMode mode)
 
void start (edm::WaitingTaskWithArenaHolder holder)
 
void start ()
 

Protected Attributes

TritonBatchMode batchMode_
 
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
 
grpc_compression_algorithm compressionAlgo_
 
triton::client::Headers headers_
 
bool manualBatchMode_
 
unsigned maxOuterDim_
 
unsigned nEntries_
 
bool noOuterDim_
 
std::vector< triton::client::InferOptions > options_
 
unsigned outerDim_
 
TritonServerType serverType_
 
bool useSharedMemory_
 
bool verbose_
 
- Protected Attributes inherited from SonicClientBase
unsigned allowedTries_
 
std::string clientName_
 
std::string debugName_
 
std::unique_ptr< SonicDispatcherdispatcher_
 
std::string fullDebugName_
 
std::optional< edm::WaitingTaskWithArenaHolderholder_
 
SonicMode mode_
 
unsigned tries_
 
bool verbose_
 
- Protected Attributes inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
Input input_
 
Output output_
 

Private Member Functions

void addEntry (unsigned entry)
 
auto client ()
 
void resizeEntries (unsigned entry)
 

Private Attributes

friend TritonInputData
 
friend TritonOutputData
 

Additional Inherited Members

- Public Types inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
typedef TritonInputMap Input
 
typedef TritonOutputMap Output
 

Detailed Description

Definition at line 21 of file TritonClient.h.

Constructor & Destructor Documentation

◆ TritonClient()

TritonClient::TritonClient ( const edm::ParameterSet params,
const std::string &  debugName 
)

Definition at line 55 of file TritonClient.cc.

References a, client_, SonicClientBase::fullDebugName_, edm::FileInPath::fullPath(), SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, submitPVResolutionJobs::key, LocalCPU, SiStripPI::max, maxOuterDim_, mps_check::msg, noOuterDim_, heppy_report::oname, options_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, submitPVValidationJobs::params, TritonService::pid(), triton_utils::printColl(), Rectangular, contentValuesFiles::server, TritonService::serverInfo(), serverType_, setBatchSize(), SonicClientBase::setMode(), AlCaHLTBitMon_QueryRunRegistry::string, summarizeEdmComparisonLogfiles::success, Sync, TRITON_THROW_IF_ERROR, heppy_batch::val, and verbose_.

56  : SonicClient(params, debugName, "TritonClient"),
58  manualBatchMode_(false),
59  verbose_(params.getUntrackedParameter<bool>("verbose")),
60  useSharedMemory_(params.getUntrackedParameter<bool>("useSharedMemory")),
61  compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter<std::string>("compression"))) {
62  options_.emplace_back(params.getParameter<std::string>("modelName"));
63  //get appropriate server for this model
65  const auto& server =
66  ts->serverInfo(options_[0].model_name_, params.getUntrackedParameter<std::string>("preferredServer"));
67  serverType_ = server.type;
68  if (verbose_)
69  edm::LogInfo(fullDebugName_) << "Using server: " << server.url;
70  //enforce sync mode for fallback CPU server to avoid contention
71  //todo: could enforce async mode otherwise (unless mode was specified by user?)
74 
75  //connect to the server
77  tc::InferenceServerGrpcClient::Create(&client_, server.url, false, server.useSsl, server.sslOptions),
78  "TritonClient(): unable to create inference context");
79 
80  //set options
81  options_[0].model_version_ = params.getParameter<std::string>("modelVersion");
82  //convert seconds to microseconds
83  options_[0].client_timeout_ = params.getUntrackedParameter<unsigned>("timeout") * 1e6;
84 
85  //get fixed parameters from local config
86  inference::ModelConfig localModelConfig;
87  {
88  const std::string& localModelConfigPath(params.getParameter<edm::FileInPath>("modelConfigPath").fullPath());
89  int fileDescriptor = open(localModelConfigPath.c_str(), O_RDONLY);
90  if (fileDescriptor < 0)
91  throw TritonException("LocalFailure")
92  << "TritonClient(): unable to open local model config: " << localModelConfigPath;
93  google::protobuf::io::FileInputStream localModelConfigInput(fileDescriptor);
94  localModelConfigInput.SetCloseOnDelete(true);
95  if (!google::protobuf::TextFormat::Parse(&localModelConfigInput, &localModelConfig))
96  throw TritonException("LocalFailure")
97  << "TritonClient(): unable to parse local model config: " << localModelConfigPath;
98  }
99 
100  //check batch size limitations (after i/o setup)
101  //triton uses max batch size = 0 to denote a model that does not support native batching (using the outer dimension)
102  //but for models that do support batching (native or otherwise), a given event may set batch size 0 to indicate no valid input is present
103  //so set the local max to 1 and keep track of "no outer dim" case
104  maxOuterDim_ = localModelConfig.max_batch_size();
105  noOuterDim_ = maxOuterDim_ == 0;
107  //propagate batch size
108  setBatchSize(1);
109 
110  //compare model checksums to remote config to enforce versioning
111  inference::ModelConfigResponse modelConfigResponse;
112  TRITON_THROW_IF_ERROR(client_->ModelConfig(&modelConfigResponse, options_[0].model_name_, options_[0].model_version_),
113  "TritonClient(): unable to get model config");
114  inference::ModelConfig remoteModelConfig(modelConfigResponse.config());
115 
116  std::map<std::string, std::array<std::string, 2>> checksums;
117  size_t fileCounter = 0;
118  for (const auto& modelConfig : {localModelConfig, remoteModelConfig}) {
119  const auto& agents = modelConfig.model_repository_agents().agents();
120  auto agent = std::find_if(agents.begin(), agents.end(), [](auto const& a) { return a.name() == "checksum"; });
121  if (agent != agents.end()) {
122  const auto& params = agent->parameters();
123  for (const auto& [key, val] : params) {
124  // only check the requested version
125  if (key.compare(0, options_[0].model_version_.size() + 1, options_[0].model_version_ + "/") == 0)
126  checksums[key][fileCounter] = val;
127  }
128  }
129  ++fileCounter;
130  }
131  std::vector<std::string> incorrect;
132  for (const auto& [key, val] : checksums) {
133  if (checksums[key][0] != checksums[key][1])
134  incorrect.push_back(key);
135  }
136  if (!incorrect.empty())
137  throw TritonException("ModelVersioning") << "The following files have incorrect checksums on the remote server: "
138  << triton_utils::printColl(incorrect, ", ");
139 
140  //get model info
141  inference::ModelMetadataResponse modelMetadata;
142  TRITON_THROW_IF_ERROR(client_->ModelMetadata(&modelMetadata, options_[0].model_name_, options_[0].model_version_),
143  "TritonClient(): unable to get model metadata");
144 
145  //get input and output (which know their sizes)
146  const auto& nicInputs = modelMetadata.inputs();
147  const auto& nicOutputs = modelMetadata.outputs();
148 
149  //report all model errors at once
150  std::stringstream msg;
151  std::string msg_str;
152 
153  //currently no use case is foreseen for a model with zero inputs or outputs
154  if (nicInputs.empty())
155  msg << "Model on server appears malformed (zero inputs)\n";
156 
157  if (nicOutputs.empty())
158  msg << "Model on server appears malformed (zero outputs)\n";
159 
160  //stop if errors
161  msg_str = msg.str();
162  if (!msg_str.empty())
163  throw cms::Exception("ModelErrors") << msg_str;
164 
165  //setup input map
166  std::stringstream io_msg;
167  if (verbose_)
168  io_msg << "Model inputs: "
169  << "\n";
170  for (const auto& nicInput : nicInputs) {
171  const auto& iname = nicInput.name();
172  auto [curr_itr, success] = input_.emplace(std::piecewise_construct,
173  std::forward_as_tuple(iname),
174  std::forward_as_tuple(iname, nicInput, this, ts->pid()));
175  auto& curr_input = curr_itr->second;
176  if (verbose_) {
177  io_msg << " " << iname << " (" << curr_input.dname() << ", " << curr_input.byteSize()
178  << " b) : " << triton_utils::printColl(curr_input.shape()) << "\n";
179  }
180  }
181 
182  //allow selecting only some outputs from server
183  const auto& v_outputs = params.getUntrackedParameter<std::vector<std::string>>("outputs");
184  std::unordered_set s_outputs(v_outputs.begin(), v_outputs.end());
185 
186  //setup output map
187  if (verbose_)
188  io_msg << "Model outputs: "
189  << "\n";
190  for (const auto& nicOutput : nicOutputs) {
191  const auto& oname = nicOutput.name();
192  if (!s_outputs.empty() and s_outputs.find(oname) == s_outputs.end())
193  continue;
194  auto [curr_itr, success] = output_.emplace(std::piecewise_construct,
195  std::forward_as_tuple(oname),
196  std::forward_as_tuple(oname, nicOutput, this, ts->pid()));
197  auto& curr_output = curr_itr->second;
198  if (verbose_) {
199  io_msg << " " << oname << " (" << curr_output.dname() << ", " << curr_output.byteSize()
200  << " b) : " << triton_utils::printColl(curr_output.shape()) << "\n";
201  }
202  if (!s_outputs.empty())
203  s_outputs.erase(oname);
204  }
205 
206  //check if any requested outputs were not available
207  if (!s_outputs.empty())
208  throw cms::Exception("MissingOutput")
209  << "Some requested outputs were not available on the server: " << triton_utils::printColl(s_outputs);
210 
211  //print model info
212  std::stringstream model_msg;
213  if (verbose_) {
214  model_msg << "Model name: " << options_[0].model_name_ << "\n"
215  << "Model version: " << options_[0].model_version_ << "\n"
216  << "Model max outer dim: " << (noOuterDim_ ? 0 : maxOuterDim_) << "\n";
217  edm::LogInfo(fullDebugName_) << model_msg.str() << io_msg.str();
218  }
219 }
const std::string & pid() const
const std::string & debugName() const
unsigned maxOuterDim_
Definition: TritonClient.h:72
bool setBatchSize(unsigned bsize)
bool noOuterDim_
Definition: TritonClient.h:74
std::string fullPath() const
Definition: FileInPath.cc:161
#define TRITON_THROW_IF_ERROR(X, MSG)
Definition: triton_utils.h:75
bool manualBatchMode_
Definition: TritonClient.h:77
bool useSharedMemory_
Definition: TritonClient.h:79
void setMode(SonicMode mode)
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:84
TritonServerType serverType_
Definition: TritonClient.h:80
SonicClient(const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
Definition: SonicClient.h:12
key
prepare the HTCondor submission files and eventually submit them
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:81
Log< level::Info, false > LogInfo
tuple msg
Definition: mps_check.py:286
Server serverInfo(const std::string &model, const std::string &preferred="") const
double a
Definition: hdecay.h:121
std::string fullDebugName_
TritonBatchMode batchMode_
Definition: TritonClient.h:76
std::string printColl(const C &coll, const std::string &delim=", ")
Definition: triton_utils.cc:9
std::vector< triton::client::InferOptions > options_
Definition: TritonClient.h:86

◆ ~TritonClient()

TritonClient::~TritonClient ( )
override

Definition at line 221 of file TritonClient.cc.

References SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, and SonicClientTypes< TritonInputMap, TritonOutputMap >::output_.

221  {
222  //by default: members of this class destroyed before members of base class
223  //in shared memory case, TritonMemResource (member of TritonData) unregisters from client_ in its destructor
224  //but input/output objects are member of base class, so destroyed after client_ (member of this class)
225  //therefore, clear the maps here
226  input_.clear();
227  output_.clear();
228 }

Member Function Documentation

◆ addEntry()

void TritonClient::addEntry ( unsigned  entry)
private

Definition at line 281 of file TritonClient.cc.

References batchMode_, mps_splice::entry, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, outerDim_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, and Ragged.

Referenced by resizeEntries().

281  {
282  for (auto& element : input_) {
283  element.second.addEntryImpl(entry);
284  }
285  for (auto& element : output_) {
286  element.second.addEntryImpl(entry);
287  }
288  if (entry > 0) {
290  outerDim_ = 1;
291  }
292 }
unsigned outerDim_
Definition: TritonClient.h:73
TritonBatchMode batchMode_
Definition: TritonClient.h:76

◆ batchMode()

TritonBatchMode TritonClient::batchMode ( ) const
inline

Definition at line 42 of file TritonClient.h.

References batchMode_.

Referenced by setBatchMode().

42 { return batchMode_; }
TritonBatchMode batchMode_
Definition: TritonClient.h:76

◆ batchSize()

unsigned TritonClient::batchSize ( ) const

◆ client()

auto TritonClient::client ( )
inlineprivate

Definition at line 93 of file TritonClient.h.

References client_.

Referenced by TritonData< IO >::client().

93 { return client_.get(); }
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:84

◆ evaluate()

void TritonClient::evaluate ( )
overrideprotectedvirtual

Implements SonicClientBase.

Definition at line 348 of file TritonClient.cc.

References Async, batchSize(), client_, compressionAlgo_, SonicClientBase::finish(), getResults(), getServerSideStatus(), handle_exception(), headers_, mps_fire::i, ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::if(), SonicClientTypes< TritonInputMap, TritonOutputMap >::input(), SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, SonicClientBase::mode_, nEntries(), heppy_report::oname, options_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, reportServerSideStats(), mysort::results, dqmMemoryStats::stats, summarizeEdmComparisonLogfiles::success, summarizeServerStats(), TRITON_THROW_IF_ERROR, and verbose().

348  {
349  //in case there is nothing to process
350  if (batchSize() == 0) {
351  //call getResults on an empty vector
352  std::vector<std::shared_ptr<tc::InferResult>> empty_results;
353  getResults(empty_results);
354  finish(true);
355  return;
356  }
357 
358  //set up input pointers for triton (generalized for multi-request ragged batching case)
359  //one vector<InferInput*> per request
360  unsigned nEntriesVal = nEntries();
361  std::vector<std::vector<triton::client::InferInput*>> inputsTriton(nEntriesVal);
362  for (auto& inputTriton : inputsTriton) {
363  inputTriton.reserve(input_.size());
364  }
365  for (auto& [iname, input] : input_) {
366  for (unsigned i = 0; i < nEntriesVal; ++i) {
367  inputsTriton[i].push_back(input.data(i));
368  }
369  }
370 
371  //set up output pointers similarly
372  std::vector<std::vector<const triton::client::InferRequestedOutput*>> outputsTriton(nEntriesVal);
373  for (auto& outputTriton : outputsTriton) {
374  outputTriton.reserve(output_.size());
375  }
376  for (auto& [oname, output] : output_) {
377  for (unsigned i = 0; i < nEntriesVal; ++i) {
378  outputsTriton[i].push_back(output.data(i));
379  }
380  }
381 
382  //set up shared memory for output
383  auto success = handle_exception([&]() {
384  for (auto& element : output_) {
385  element.second.prepare();
386  }
387  });
388  if (!success)
389  return;
390 
391  // Get the status of the server prior to the request being made.
392  inference::ModelStatistics start_status;
393  success = handle_exception([&]() {
394  if (verbose())
395  start_status = getServerSideStatus();
396  });
397  if (!success)
398  return;
399 
400  if (mode_ == SonicMode::Async) {
401  //non-blocking call
402  success = handle_exception([&]() {
404  client_->AsyncInferMulti(
405  [start_status, this](std::vector<tc::InferResult*> resultsTmp) {
406  //immediately convert to shared_ptr
407  const auto& results = convertToShared(resultsTmp);
408  //check results
409  for (auto ptr : results) {
410  auto success = handle_exception(
411  [&]() { TRITON_THROW_IF_ERROR(ptr->RequestStatus(), "evaluate(): unable to get result(s)"); });
412  if (!success)
413  return;
414  }
415 
416  if (verbose()) {
417  inference::ModelStatistics end_status;
418  auto success = handle_exception([&]() { end_status = getServerSideStatus(); });
419  if (!success)
420  return;
421 
422  const auto& stats = summarizeServerStats(start_status, end_status);
424  }
425 
426  //check result
427  auto success = handle_exception([&]() { getResults(results); });
428  if (!success)
429  return;
430 
431  //finish
432  finish(true);
433  },
434  options_,
435  inputsTriton,
436  outputsTriton,
437  headers_,
439  "evaluate(): unable to launch async run");
440  });
441  if (!success)
442  return;
443  } else {
444  //blocking call
445  std::vector<tc::InferResult*> resultsTmp;
446  success = handle_exception([&]() {
448  client_->InferMulti(&resultsTmp, options_, inputsTriton, outputsTriton, headers_, compressionAlgo_),
449  "evaluate(): unable to run and/or get result");
450  });
451  //immediately convert to shared_ptr
452  const auto& results = convertToShared(resultsTmp);
453  if (!success)
454  return;
455 
456  if (verbose()) {
457  inference::ModelStatistics end_status;
458  success = handle_exception([&]() { end_status = getServerSideStatus(); });
459  if (!success)
460  return;
461 
462  const auto& stats = summarizeServerStats(start_status, end_status);
464  }
465 
467  if (!success)
468  return;
469 
470  finish(true);
471  }
472 }
bool verbose() const
Definition: TritonClient.h:43
void getResults(const std::vector< std::shared_ptr< triton::client::InferResult >> &results)
#define TRITON_THROW_IF_ERROR(X, MSG)
Definition: triton_utils.h:75
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:84
void finish(bool success, std::exception_ptr eptr=std::exception_ptr{})
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
bool handle_exception(F &&call)
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:81
inference::ModelStatistics getServerSideStatus() const
triton::client::Headers headers_
Definition: TritonClient.h:82
unsigned nEntries() const
unsigned batchSize() const
void reportServerSideStats(const ServerSideStats &stats) const
results
Definition: mysort.py:8
Definition: output.py:1
std::vector< triton::client::InferOptions > options_
Definition: TritonClient.h:86
if(threadIdxLocalY==0 &&threadIdxLocalX==0)

◆ fillPSetDescription()

void TritonClient::fillPSetDescription ( edm::ParameterSetDescription iDesc)
static

Definition at line 543 of file TritonClient.cc.

References edm::ParameterSetDescription::add(), edm::ParameterSetDescription::addUntracked(), SonicClientBase::fillBasePSetDescription(), and AlCaHLTBitMon_QueryRunRegistry::string.

Referenced by DeepTauIdSonicProducer::fillDescriptions(), DeepMETSonicProducer::fillDescriptions(), ParticleNetSonicJetTagsProducer::fillDescriptions(), SCEnergyCorrectorDRNProducer::fillDescriptions(), and DRNCorrectionProducerT< T >::fillDescriptions().

543  {
544  edm::ParameterSetDescription descClient;
545  fillBasePSetDescription(descClient);
546  descClient.add<std::string>("modelName");
547  descClient.add<std::string>("modelVersion", "");
548  descClient.add<edm::FileInPath>("modelConfigPath");
549  //server parameters should not affect the physics results
550  descClient.addUntracked<std::string>("preferredServer", "");
551  descClient.addUntracked<unsigned>("timeout");
552  descClient.addUntracked<bool>("useSharedMemory", true);
553  descClient.addUntracked<std::string>("compression", "");
554  descClient.addUntracked<std::vector<std::string>>("outputs", {});
555  iDesc.add<edm::ParameterSetDescription>("Client", descClient);
556 }
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
ParameterDescriptionBase * add(U const &iLabel, T const &value)
static void fillBasePSetDescription(edm::ParameterSetDescription &desc, bool allowRetry=true)

◆ getResults()

void TritonClient::getResults ( const std::vector< std::shared_ptr< triton::client::InferResult >> &  results)
protected

Definition at line 325 of file TritonClient.cc.

References mps_fire::i, noOuterDim_, heppy_report::oname, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, mps_fire::result, mysort::results, and TRITON_THROW_IF_ERROR.

Referenced by evaluate().

325  {
326  for (unsigned i = 0; i < results.size(); ++i) {
327  const auto& result = results[i];
328  for (auto& [oname, output] : output_) {
329  //set shape here before output becomes const
330  if (output.variableDims()) {
331  std::vector<int64_t> tmp_shape;
332  TRITON_THROW_IF_ERROR(result->Shape(oname, &tmp_shape),
333  "getResults(): unable to get output shape for " + oname);
334  if (!noOuterDim_)
335  tmp_shape.erase(tmp_shape.begin());
336  output.setShape(tmp_shape, i);
337  }
338  //extend lifetime
339  output.setResult(result, i);
340  //compute size after getting all result entries
341  if (i == results.size() - 1)
342  output.computeSizes();
343  }
344  }
345 }
bool noOuterDim_
Definition: TritonClient.h:74
#define TRITON_THROW_IF_ERROR(X, MSG)
Definition: triton_utils.h:75
results
Definition: mysort.py:8
Definition: output.py:1

◆ getServerSideStatus()

inference::ModelStatistics TritonClient::getServerSideStatus ( ) const
protected

Definition at line 532 of file TritonClient.cc.

References client_, options_, TRITON_THROW_IF_ERROR, and verbose_.

Referenced by evaluate().

532  {
533  if (verbose_) {
534  inference::ModelStatisticsResponse resp;
535  TRITON_THROW_IF_ERROR(client_->ModelInferenceStatistics(&resp, options_[0].model_name_, options_[0].model_version_),
536  "getServerSideStatus(): unable to get model statistics");
537  return *(resp.model_stats().begin());
538  }
539  return inference::ModelStatistics{};
540 }
#define TRITON_THROW_IF_ERROR(X, MSG)
Definition: triton_utils.h:75
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:84
std::vector< triton::client::InferOptions > options_
Definition: TritonClient.h:86

◆ handle_exception()

template<typename F >
bool TritonClient::handle_exception ( F &&  call)
protected

Definition at line 306 of file TritonClient.cc.

References CMS_SA_ALLOW, MillePedeFileConverter_cfg::e, and SonicClientBase::finish().

Referenced by evaluate().

306  {
307  //caught exceptions will be propagated to edm::WaitingTaskWithArenaHolder
308  CMS_SA_ALLOW try {
309  call();
310  return true;
311  }
312  //TritonExceptions are intended/expected to be recoverable, i.e. retries should be allowed
313  catch (TritonException& e) {
314  e.convertToWarning();
315  finish(false);
316  return false;
317  }
318  //other exceptions are not: execution should stop if they are encountered
319  catch (...) {
320  finish(false, std::current_exception());
321  return false;
322  }
323 }
#define CMS_SA_ALLOW
void finish(bool success, std::exception_ptr eptr=std::exception_ptr{})

◆ nEntries()

unsigned TritonClient::nEntries ( ) const
protected

Definition at line 244 of file TritonClient.cc.

References SonicClientTypes< TritonInputMap, TritonOutputMap >::input_.

Referenced by batchSize(), evaluate(), and resizeEntries().

244 { return !input_.empty() ? input_.begin()->second.entries_.size() : 0; }

◆ noOuterDim()

bool TritonClient::noOuterDim ( ) const
inlineprotected

Definition at line 57 of file TritonClient.h.

References noOuterDim_.

Referenced by TritonData< IO >::fullLoc(), and TritonData< IO >::toServer().

57 { return noOuterDim_; }
bool noOuterDim_
Definition: TritonClient.h:74

◆ outerDim()

unsigned TritonClient::outerDim ( ) const
inlineprotected

Definition at line 58 of file TritonClient.h.

References outerDim_.

Referenced by TritonData< IO >::computeSizes(), TritonData< IO >::fromServer(), and TritonData< IO >::toServer().

58 { return outerDim_; }
unsigned outerDim_
Definition: TritonClient.h:73

◆ reportServerSideStats()

void TritonClient::reportServerSideStats ( const ServerSideStats stats) const
protected

Definition at line 474 of file TritonClient.cc.

References ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), submitPVResolutionJobs::count, SonicClientBase::debugName_, SonicClientBase::fullDebugName_, mps_check::msg, and dqmMemoryStats::stats.

Referenced by evaluate().

474  {
475  std::stringstream msg;
476 
477  // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/perf_client/inference_profiler.cc
478  const uint64_t count = stats.success_count_;
479  msg << " Inference count: " << stats.inference_count_ << "\n";
480  msg << " Execution count: " << stats.execution_count_ << "\n";
481  msg << " Successful request count: " << count << "\n";
482 
483  if (count > 0) {
484  auto get_avg_us = [count](uint64_t tval) {
485  constexpr uint64_t us_to_ns = 1000;
486  return tval / us_to_ns / count;
487  };
488 
489  const uint64_t cumm_avg_us = get_avg_us(stats.cumm_time_ns_);
490  const uint64_t queue_avg_us = get_avg_us(stats.queue_time_ns_);
491  const uint64_t compute_input_avg_us = get_avg_us(stats.compute_input_time_ns_);
492  const uint64_t compute_infer_avg_us = get_avg_us(stats.compute_infer_time_ns_);
493  const uint64_t compute_output_avg_us = get_avg_us(stats.compute_output_time_ns_);
494  const uint64_t compute_avg_us = compute_input_avg_us + compute_infer_avg_us + compute_output_avg_us;
495  const uint64_t overhead =
496  (cumm_avg_us > queue_avg_us + compute_avg_us) ? (cumm_avg_us - queue_avg_us - compute_avg_us) : 0;
497 
498  msg << " Avg request latency: " << cumm_avg_us << " usec"
499  << "\n"
500  << " (overhead " << overhead << " usec + "
501  << "queue " << queue_avg_us << " usec + "
502  << "compute input " << compute_input_avg_us << " usec + "
503  << "compute infer " << compute_infer_avg_us << " usec + "
504  << "compute output " << compute_output_avg_us << " usec)" << std::endl;
505  }
506 
507  if (!debugName_.empty())
508  edm::LogInfo(fullDebugName_) << msg.str();
509 }
std::string debugName_
unsigned long long uint64_t
Definition: Time.h:13
tuple msg
Definition: mps_check.py:286
std::string fullDebugName_

◆ reset()

void TritonClient::reset ( void  )
overridevirtual

Reimplemented from SonicClientBase.

Definition at line 294 of file TritonClient.cc.

References batchMode_, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, manualBatchMode_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, and Rectangular.

294  {
295  if (!manualBatchMode_)
297  for (auto& element : input_) {
298  element.second.reset();
299  }
300  for (auto& element : output_) {
301  element.second.reset();
302  }
303 }
bool manualBatchMode_
Definition: TritonClient.h:77
TritonBatchMode batchMode_
Definition: TritonClient.h:76

◆ resetBatchMode()

void TritonClient::resetBatchMode ( )

Definition at line 239 of file TritonClient.cc.

References batchMode_, manualBatchMode_, and Rectangular.

239  {
241  manualBatchMode_ = false;
242 }
bool manualBatchMode_
Definition: TritonClient.h:77
TritonBatchMode batchMode_
Definition: TritonClient.h:76

◆ resizeEntries()

void TritonClient::resizeEntries ( unsigned  entry)
private

Definition at line 267 of file TritonClient.cc.

References addEntry(), mps_splice::entry, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, nEntries(), and SonicClientTypes< TritonInputMap, TritonOutputMap >::output_.

Referenced by setBatchSize().

267  {
268  if (entry > nEntries())
269  //addEntry(entry) extends the vector to size entry+1
270  addEntry(entry - 1);
271  else if (entry < nEntries()) {
272  for (auto& element : input_) {
273  element.second.entries_.resize(entry);
274  }
275  for (auto& element : output_) {
276  element.second.entries_.resize(entry);
277  }
278  }
279 }
void addEntry(unsigned entry)
unsigned nEntries() const

◆ serverType()

TritonServerType TritonClient::serverType ( ) const
inline

Definition at line 50 of file TritonClient.h.

References serverType_.

Referenced by TritonData< IO >::updateMem().

50 { return serverType_; }
TritonServerType serverType_
Definition: TritonClient.h:80

◆ setBatchMode()

void TritonClient::setBatchMode ( TritonBatchMode  batchMode)

Definition at line 230 of file TritonClient.cc.

References batchMode(), batchMode_, batchSize(), manualBatchMode_, and setBatchSize().

230  {
231  unsigned oldBatchSize = batchSize();
233  manualBatchMode_ = true;
234  //this allows calling setBatchSize() and setBatchMode() in either order consistently to change back and forth
235  //includes handling of change from ragged to rectangular if multiple entries already created
236  setBatchSize(oldBatchSize);
237 }
bool setBatchSize(unsigned bsize)
bool manualBatchMode_
Definition: TritonClient.h:77
TritonBatchMode batchMode() const
Definition: TritonClient.h:42
unsigned batchSize() const
TritonBatchMode batchMode_
Definition: TritonClient.h:76

◆ setBatchSize()

bool TritonClient::setBatchSize ( unsigned  bsize)

Definition at line 248 of file TritonClient.cc.

References batchMode_, SonicClientBase::fullDebugName_, maxOuterDim_, SiStripPI::min, outerDim_, Rectangular, and resizeEntries().

Referenced by setBatchMode(), and TritonClient().

248  {
250  if (bsize > maxOuterDim_) {
251  edm::LogWarning(fullDebugName_) << "Requested batch size " << bsize << " exceeds server-specified max batch size "
252  << maxOuterDim_ << ". Batch size will remain as " << outerDim_;
253  return false;
254  } else {
255  outerDim_ = bsize;
256  //take min to allow resizing to 0
258  return true;
259  }
260  } else {
261  resizeEntries(bsize);
262  outerDim_ = 1;
263  return true;
264  }
265 }
unsigned maxOuterDim_
Definition: TritonClient.h:72
void resizeEntries(unsigned entry)
unsigned outerDim_
Definition: TritonClient.h:73
std::string fullDebugName_
Log< level::Warning, false > LogWarning
TritonBatchMode batchMode_
Definition: TritonClient.h:76

◆ setUseSharedMemory()

void TritonClient::setUseSharedMemory ( bool  useShm)
inline

Definition at line 45 of file TritonClient.h.

References useSharedMemory_.

45 { useSharedMemory_ = useShm; }
bool useSharedMemory_
Definition: TritonClient.h:79

◆ summarizeServerStats()

TritonClient::ServerSideStats TritonClient::summarizeServerStats ( const inference::ModelStatistics &  start_status,
const inference::ModelStatistics &  end_status 
) const
protected

Definition at line 511 of file TritonClient.cc.

References TritonClient::ServerSideStats::compute_infer_time_ns_, TritonClient::ServerSideStats::compute_input_time_ns_, TritonClient::ServerSideStats::compute_output_time_ns_, TritonClient::ServerSideStats::cumm_time_ns_, TritonClient::ServerSideStats::execution_count_, TritonClient::ServerSideStats::inference_count_, TritonClient::ServerSideStats::queue_time_ns_, and TritonClient::ServerSideStats::success_count_.

Referenced by evaluate().

512  {
513  TritonClient::ServerSideStats server_stats;
514 
515  server_stats.inference_count_ = end_status.inference_count() - start_status.inference_count();
516  server_stats.execution_count_ = end_status.execution_count() - start_status.execution_count();
517  server_stats.success_count_ =
518  end_status.inference_stats().success().count() - start_status.inference_stats().success().count();
519  server_stats.cumm_time_ns_ =
520  end_status.inference_stats().success().ns() - start_status.inference_stats().success().ns();
521  server_stats.queue_time_ns_ = end_status.inference_stats().queue().ns() - start_status.inference_stats().queue().ns();
522  server_stats.compute_input_time_ns_ =
523  end_status.inference_stats().compute_input().ns() - start_status.inference_stats().compute_input().ns();
524  server_stats.compute_infer_time_ns_ =
525  end_status.inference_stats().compute_infer().ns() - start_status.inference_stats().compute_infer().ns();
526  server_stats.compute_output_time_ns_ =
527  end_status.inference_stats().compute_output().ns() - start_status.inference_stats().compute_output().ns();
528 
529  return server_stats;
530 }

◆ useSharedMemory()

bool TritonClient::useSharedMemory ( ) const
inline

Definition at line 44 of file TritonClient.h.

References useSharedMemory_.

44 { return useSharedMemory_; }
bool useSharedMemory_
Definition: TritonClient.h:79

◆ verbose()

bool TritonClient::verbose ( ) const
inline

Definition at line 43 of file TritonClient.h.

References verbose_.

Referenced by evaluate().

43 { return verbose_; }

Member Data Documentation

◆ batchMode_

TritonBatchMode TritonClient::batchMode_
protected

◆ client_

std::unique_ptr<triton::client::InferenceServerGrpcClient> TritonClient::client_
protected

Definition at line 84 of file TritonClient.h.

Referenced by client(), evaluate(), getServerSideStatus(), and TritonClient().

◆ compressionAlgo_

grpc_compression_algorithm TritonClient::compressionAlgo_
protected

Definition at line 81 of file TritonClient.h.

Referenced by evaluate().

◆ headers_

triton::client::Headers TritonClient::headers_
protected

Definition at line 82 of file TritonClient.h.

Referenced by evaluate().

◆ manualBatchMode_

bool TritonClient::manualBatchMode_
protected

Definition at line 77 of file TritonClient.h.

Referenced by reset(), resetBatchMode(), and setBatchMode().

◆ maxOuterDim_

unsigned TritonClient::maxOuterDim_
protected

Definition at line 72 of file TritonClient.h.

Referenced by setBatchSize(), and TritonClient().

◆ nEntries_

unsigned TritonClient::nEntries_
protected

Definition at line 75 of file TritonClient.h.

◆ noOuterDim_

bool TritonClient::noOuterDim_
protected

Definition at line 74 of file TritonClient.h.

Referenced by getResults(), noOuterDim(), and TritonClient().

◆ options_

std::vector<triton::client::InferOptions> TritonClient::options_
protected

◆ outerDim_

unsigned TritonClient::outerDim_
protected

Definition at line 73 of file TritonClient.h.

Referenced by addEntry(), batchSize(), outerDim(), and setBatchSize().

◆ serverType_

TritonServerType TritonClient::serverType_
protected

Definition at line 80 of file TritonClient.h.

Referenced by serverType(), and TritonClient().

◆ TritonInputData

friend TritonClient::TritonInputData
private

Definition at line 89 of file TritonClient.h.

◆ TritonOutputData

friend TritonClient::TritonOutputData
private

Definition at line 90 of file TritonClient.h.

◆ useSharedMemory_

bool TritonClient::useSharedMemory_
protected

Definition at line 79 of file TritonClient.h.

Referenced by setUseSharedMemory(), and useSharedMemory().

◆ verbose_

bool TritonClient::verbose_
protected

Definition at line 78 of file TritonClient.h.

Referenced by getServerSideStatus(), TritonClient(), and verbose().