CMS 3D CMS Logo

TritonClient.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_SonicTriton_TritonClient
2 #define HeterogeneousCore_SonicTriton_TritonClient
3 
9 
10 #include <map>
11 #include <vector>
12 #include <string>
13 #include <exception>
14 #include <unordered_map>
15 
16 #include "grpc_client.h"
17 #include "grpc_service.pb.h"
18 
19 class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
20 public:
21  struct ServerSideStats {
30  };
31 
32  //constructor
34 
35  //destructor
36  ~TritonClient() override;
37 
38  //accessors
39  unsigned batchSize() const { return batchSize_; }
40  bool verbose() const { return verbose_; }
41  bool useSharedMemory() const { return useSharedMemory_; }
42  void setUseSharedMemory(bool useShm) { useSharedMemory_ = useShm; }
43  bool setBatchSize(unsigned bsize);
44  void reset() override;
45  bool noBatch() const { return noBatch_; }
47 
48  //for fillDescriptions
50 
51 protected:
52  //helpers
53  void getResults(std::shared_ptr<triton::client::InferResult> results);
54  void evaluate() override;
55  template <typename F>
56  bool handle_exception(F&& call);
57 
58  void reportServerSideStats(const ServerSideStats& stats) const;
59  ServerSideStats summarizeServerStats(const inference::ModelStatistics& start_status,
60  const inference::ModelStatistics& end_status) const;
61 
62  inference::ModelStatistics getServerSideStatus() const;
63 
64  //members
65  unsigned maxBatchSize_;
66  unsigned batchSize_;
67  bool noBatch_;
68  bool verbose_;
71  grpc_compression_algorithm compressionAlgo_;
72  triton::client::Headers headers_;
73 
74  //IO pointers for triton
75  std::vector<triton::client::InferInput*> inputsTriton_;
76  std::vector<const triton::client::InferRequestedOutput*> outputsTriton_;
77 
78  std::unique_ptr<triton::client::InferenceServerGrpcClient> client_;
79  //stores timeout, model name and version
80  triton::client::InferOptions options_;
81 
82 private:
85 
86  //private accessors only used by data
87  auto client() { return client_.get(); }
88 };
89 
90 #endif
TritonClient::maxBatchSize_
unsigned maxBatchSize_
Definition: TritonClient.h:65
TritonClient::useSharedMemory_
bool useSharedMemory_
Definition: TritonClient.h:69
TritonClient::ServerSideStats::inference_count_
uint64_t inference_count_
Definition: TritonClient.h:22
TritonClient::evaluate
void evaluate() override
Definition: TritonClient.cc:241
TritonClient::noBatch_
bool noBatch_
Definition: TritonClient.h:67
TritonClient::outputsTriton_
std::vector< const triton::client::InferRequestedOutput * > outputsTriton_
Definition: TritonClient.h:76
TritonData.h
TritonClient::TritonInputData
friend TritonInputData
Definition: TritonClient.h:83
TritonClient::noBatch
bool noBatch() const
Definition: TritonClient.h:45
TritonClient::useSharedMemory
bool useSharedMemory() const
Definition: TritonClient.h:41
CalibrationSummaryClient_cfi.params
params
Definition: CalibrationSummaryClient_cfi.py:14
TritonClient::TritonOutputData
friend TritonOutputData
Definition: TritonClient.h:84
TritonClient::ServerSideStats
Definition: TritonClient.h:21
TritonClient::summarizeServerStats
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
Definition: TritonClient.cc:374
edm::ParameterSetDescription
Definition: ParameterSetDescription.h:52
TritonClient::setBatchSize
bool setBatchSize(unsigned bsize)
Definition: TritonClient.cc:178
bookConverter.results
results
Definition: bookConverter.py:144
SonicClientBase::debugName
const std::string & debugName() const
Definition: SonicClientBase.h:26
TritonClient::~TritonClient
~TritonClient() override
Definition: TritonClient.cc:169
F
static uInt32 F(BLOWFISH_CTX *ctx, uInt32 x)
Definition: blowfish.cc:163
TritonClient::serverType
TritonServerType serverType() const
Definition: TritonClient.h:46
TritonClient::client_
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:78
TritonClient::ServerSideStats::compute_infer_time_ns_
uint64_t compute_infer_time_ns_
Definition: TritonClient.h:28
TritonClient::ServerSideStats::compute_input_time_ns_
uint64_t compute_input_time_ns_
Definition: TritonClient.h:27
TritonClient::compressionAlgo_
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:71
TritonClient::client
auto client()
Definition: TritonClient.h:87
ParameterSetDescription.h
TritonService.h
TritonClient::serverType_
TritonServerType serverType_
Definition: TritonClient.h:70
TritonClient::TritonClient
TritonClient(const edm::ParameterSet &params, const std::string &debugName)
Definition: TritonClient.cc:39
TritonClient::inputsTriton_
std::vector< triton::client::InferInput * > inputsTriton_
Definition: TritonClient.h:75
TritonClient::verbose_
bool verbose_
Definition: TritonClient.h:68
SonicClient.h
edm::ParameterSet
Definition: ParameterSet.h:47
TritonClient::reportServerSideStats
void reportServerSideStats(const ServerSideStats &stats) const
Definition: TritonClient.cc:337
TritonClient::ServerSideStats::execution_count_
uint64_t execution_count_
Definition: TritonClient.h:23
TritonClient::ServerSideStats::success_count_
uint64_t success_count_
Definition: TritonClient.h:24
SonicClient
Definition: SonicClient.h:9
dqmMemoryStats.stats
stats
Definition: dqmMemoryStats.py:134
TritonClient::ServerSideStats::compute_output_time_ns_
uint64_t compute_output_time_ns_
Definition: TritonClient.h:29
AlCaHLTBitMon_QueryRunRegistry.string
string string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256
TritonClient::batchSize_
unsigned batchSize_
Definition: TritonClient.h:66
TritonClient::handle_exception
bool handle_exception(F &&call)
Definition: TritonClient.cc:206
TritonClient::ServerSideStats::queue_time_ns_
uint64_t queue_time_ns_
Definition: TritonClient.h:26
TritonClient::fillPSetDescription
static void fillPSetDescription(edm::ParameterSetDescription &iDesc)
Definition: TritonClient.cc:406
TritonServerType
TritonServerType
Definition: TritonService.h:24
TritonClient::headers_
triton::client::Headers headers_
Definition: TritonClient.h:72
TritonClient::verbose
bool verbose() const
Definition: TritonClient.h:40
cond::uint64_t
unsigned long long uint64_t
Definition: Time.h:13
ParameterSet.h
TritonClient::getServerSideStatus
inference::ModelStatistics getServerSideStatus() const
Definition: TritonClient.cc:395
TritonClient::getResults
void getResults(std::shared_ptr< triton::client::InferResult > results)
Definition: TritonClient.cc:225
TritonClient::reset
void reset() override
Definition: TritonClient.cc:196
TritonClient::batchSize
unsigned batchSize() const
Definition: TritonClient.h:39
TritonClient::ServerSideStats::cumm_time_ns_
uint64_t cumm_time_ns_
Definition: TritonClient.h:25
TritonClient::setUseSharedMemory
void setUseSharedMemory(bool useShm)
Definition: TritonClient.h:42
TritonClient
Definition: TritonClient.h:19
TritonClient::options_
triton::client::InferOptions options_
Definition: TritonClient.h:80