CMS 3D CMS Logo

TritonClient.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_SonicTriton_TritonClient
2 #define HeterogeneousCore_SonicTriton_TritonClient
3 
8 
9 #include <map>
10 #include <vector>
11 #include <string>
12 #include <exception>
13 #include <unordered_map>
14 
15 #include "grpc_client.h"
16 #include "grpc_service.pb.h"
17 
18 class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
19 public:
20  struct ServerSideStats {
29  };
30 
31  //constructor
33 
34  //accessors
35  unsigned batchSize() const { return batchSize_; }
36  bool verbose() const { return verbose_; }
37  bool setBatchSize(unsigned bsize);
38  void reset() override;
39 
40  //for fillDescriptions
42 
43 protected:
44  //helper
45  bool getResults(std::shared_ptr<nvidia::inferenceserver::client::InferResult> results);
46 
47  void evaluate() override;
48 
49  void reportServerSideStats(const ServerSideStats& stats) const;
50  ServerSideStats summarizeServerStats(const inference::ModelStatistics& start_status,
51  const inference::ModelStatistics& end_status) const;
52 
53  inference::ModelStatistics getServerSideStatus() const;
54 
55  //members
56  unsigned maxBatchSize_;
57  unsigned batchSize_;
58  bool noBatch_;
59  bool verbose_;
60 
61  //IO pointers for triton
62  std::vector<nvidia::inferenceserver::client::InferInput*> inputsTriton_;
63  std::vector<const nvidia::inferenceserver::client::InferRequestedOutput*> outputsTriton_;
64 
65  std::unique_ptr<nvidia::inferenceserver::client::InferenceServerGrpcClient> client_;
66  //stores timeout, model name and version
67  nvidia::inferenceserver::client::InferOptions options_;
68 };
69 
70 #endif
TritonClient::maxBatchSize_
unsigned maxBatchSize_
Definition: TritonClient.h:56
TritonClient::ServerSideStats::inference_count_
uint64_t inference_count_
Definition: TritonClient.h:21
TritonClient::evaluate
void evaluate() override
Definition: TritonClient.cc:189
TritonClient::noBatch_
bool noBatch_
Definition: TritonClient.h:58
TritonData.h
TritonClient::client_
std::unique_ptr< nvidia::inferenceserver::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:65
TritonClient::outputsTriton_
std::vector< const nvidia::inferenceserver::client::InferRequestedOutput * > outputsTriton_
Definition: TritonClient.h:63
CalibrationSummaryClient_cfi.params
params
Definition: CalibrationSummaryClient_cfi.py:14
TritonClient::ServerSideStats
Definition: TritonClient.h:20
TritonClient::summarizeServerStats
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
Definition: TritonClient.cc:306
edm::ParameterSetDescription
Definition: ParameterSetDescription.h:52
TritonClient::setBatchSize
bool setBatchSize(unsigned bsize)
Definition: TritonClient.cc:143
bookConverter.results
results
Definition: bookConverter.py:144
TritonClient::ServerSideStats::compute_infer_time_ns_
uint64_t compute_infer_time_ns_
Definition: TritonClient.h:27
TritonClient::ServerSideStats::compute_input_time_ns_
uint64_t compute_input_time_ns_
Definition: TritonClient.h:26
TritonClient::inputsTriton_
std::vector< nvidia::inferenceserver::client::InferInput * > inputsTriton_
Definition: TritonClient.h:62
ParameterSetDescription.h
TritonClient::verbose_
bool verbose_
Definition: TritonClient.h:59
SonicClient.h
edm::ParameterSet
Definition: ParameterSet.h:47
TritonClient::reportServerSideStats
void reportServerSideStats(const ServerSideStats &stats) const
Definition: TritonClient.cc:269
TritonClient::ServerSideStats::execution_count_
uint64_t execution_count_
Definition: TritonClient.h:22
TritonClient::ServerSideStats::success_count_
uint64_t success_count_
Definition: TritonClient.h:23
TritonClient::getResults
bool getResults(std::shared_ptr< nvidia::inferenceserver::client::InferResult > results)
Definition: TritonClient.cc:170
SonicClient
Definition: SonicClient.h:9
TritonClient::TritonClient
TritonClient(const edm::ParameterSet &params)
Definition: TritonClient.cc:23
dqmMemoryStats.stats
stats
Definition: dqmMemoryStats.py:134
TritonClient::ServerSideStats::compute_output_time_ns_
uint64_t compute_output_time_ns_
Definition: TritonClient.h:28
TritonClient::batchSize_
unsigned batchSize_
Definition: TritonClient.h:57
TritonClient::ServerSideStats::queue_time_ns_
uint64_t queue_time_ns_
Definition: TritonClient.h:25
TritonClient::fillPSetDescription
static void fillPSetDescription(edm::ParameterSetDescription &iDesc)
Definition: TritonClient.cc:338
TritonClient::verbose
bool verbose() const
Definition: TritonClient.h:36
TritonClient::options_
nvidia::inferenceserver::client::InferOptions options_
Definition: TritonClient.h:67
cond::uint64_t
unsigned long long uint64_t
Definition: Time.h:13
ParameterSet.h
TritonClient::getServerSideStatus
inference::ModelStatistics getServerSideStatus() const
Definition: TritonClient.cc:327
TritonClient::reset
void reset() override
Definition: TritonClient.cc:161
TritonClient::batchSize
unsigned batchSize() const
Definition: TritonClient.h:35
TritonClient::ServerSideStats::cumm_time_ns_
uint64_t cumm_time_ns_
Definition: TritonClient.h:24
TritonClient
Definition: TritonClient.h:18