CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
TritonClient.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_SonicTriton_TritonClient
2 #define HeterogeneousCore_SonicTriton_TritonClient
3 
9 
10 #include <map>
11 #include <vector>
12 #include <string>
13 #include <exception>
14 #include <unordered_map>
15 
16 #include "grpc_client.h"
17 #include "grpc_service.pb.h"
18 
19 class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
20 public:
21  struct ServerSideStats {
30  };
31 
32  //constructor
34 
35  //destructor
36  ~TritonClient() override;
37 
38  //accessors
39  unsigned batchSize() const { return batchSize_; }
40  bool verbose() const { return verbose_; }
41  bool useSharedMemory() const { return useSharedMemory_; }
42  void setUseSharedMemory(bool useShm) { useSharedMemory_ = useShm; }
43  bool setBatchSize(unsigned bsize);
44  void reset() override;
45  bool noBatch() const { return noBatch_; }
47 
48  //for fillDescriptions
50 
51 protected:
52  //helpers
53  void getResults(std::shared_ptr<triton::client::InferResult> results);
54  void evaluate() override;
55  template <typename F>
56  bool handle_exception(F&& call);
57 
58  void reportServerSideStats(const ServerSideStats& stats) const;
59  ServerSideStats summarizeServerStats(const inference::ModelStatistics& start_status,
60  const inference::ModelStatistics& end_status) const;
61 
62  inference::ModelStatistics getServerSideStatus() const;
63 
64  //members
65  unsigned maxBatchSize_;
66  unsigned batchSize_;
67  bool noBatch_;
68  bool verbose_;
71  grpc_compression_algorithm compressionAlgo_;
72  triton::client::Headers headers_;
73 
74  //IO pointers for triton
75  std::vector<triton::client::InferInput*> inputsTriton_;
76  std::vector<const triton::client::InferRequestedOutput*> outputsTriton_;
77 
78  std::unique_ptr<triton::client::InferenceServerGrpcClient> client_;
79  //stores timeout, model name and version
80  triton::client::InferOptions options_;
81 
82 private:
85 
86  //private accessors only used by data
87  auto client() { return client_.get(); }
88 };
89 
90 #endif
bool setBatchSize(unsigned bsize)
~TritonClient() override
bool noBatch() const
Definition: TritonClient.h:45
inference::ModelStatistics getServerSideStatus() const
dictionary results
std::vector< const triton::client::InferRequestedOutput * > outputsTriton_
Definition: TritonClient.h:76
bool useSharedMemory_
Definition: TritonClient.h:69
unsigned batchSize() const
Definition: TritonClient.h:39
friend TritonOutputData
Definition: TritonClient.h:84
const std::string & debugName() const
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:78
unsigned maxBatchSize_
Definition: TritonClient.h:65
TritonClient(const edm::ParameterSet &params, const std::string &debugName)
Definition: TritonClient.cc:39
friend TritonInputData
Definition: TritonClient.h:83
void reportServerSideStats(const ServerSideStats &stats) const
TritonServerType serverType_
Definition: TritonClient.h:70
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
bool handle_exception(F &&call)
unsigned batchSize_
Definition: TritonClient.h:66
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:71
auto client()
Definition: TritonClient.h:87
triton::client::Headers headers_
Definition: TritonClient.h:72
unsigned long long uint64_t
Definition: Time.h:13
std::vector< triton::client::InferInput * > inputsTriton_
Definition: TritonClient.h:75
bool useSharedMemory() const
Definition: TritonClient.h:41
bool verbose() const
Definition: TritonClient.h:40
void setUseSharedMemory(bool useShm)
Definition: TritonClient.h:42
triton::client::InferOptions options_
Definition: TritonClient.h:80
void evaluate() override
void getResults(std::shared_ptr< triton::client::InferResult > results)
void reset() override
TritonServerType
Definition: TritonService.h:25
static void fillPSetDescription(edm::ParameterSetDescription &iDesc)
static uInt32 F(BLOWFISH_CTX *ctx, uInt32 x)
Definition: blowfish.cc:163
TritonServerType serverType() const
Definition: TritonClient.h:46