CMS 3D CMS Logo

TritonClient.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_SonicTriton_TritonClient
2 #define HeterogeneousCore_SonicTriton_TritonClient
3 
9 
10 #include <map>
11 #include <vector>
12 #include <string>
13 #include <exception>
14 #include <unordered_map>
15 
16 #include "grpc_client.h"
17 #include "grpc_service.pb.h"
18 
19 enum class TritonBatchMode { Rectangular = 1, Ragged = 2 };
20 
21 class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
22 public:
23  struct ServerSideStats {
32  };
33 
34  //constructor
36 
37  //destructor
38  ~TritonClient() override;
39 
40  //accessors
41  unsigned batchSize() const;
42  TritonBatchMode batchMode() const { return batchMode_; }
43  bool verbose() const { return verbose_; }
44  bool useSharedMemory() const { return useSharedMemory_; }
45  void setUseSharedMemory(bool useShm) { useSharedMemory_ = useShm; }
46  bool setBatchSize(unsigned bsize);
48  void resetBatchMode();
49  void reset() override;
51 
52  //for fillDescriptions
54 
55 protected:
56  //helpers
57  bool noOuterDim() const { return noOuterDim_; }
58  unsigned outerDim() const { return outerDim_; }
59  unsigned nEntries() const;
60  void getResults(const std::vector<std::shared_ptr<triton::client::InferResult>>& results);
61  void evaluate() override;
62  template <typename F>
63  bool handle_exception(F&& call);
64 
65  void reportServerSideStats(const ServerSideStats& stats) const;
66  ServerSideStats summarizeServerStats(const inference::ModelStatistics& start_status,
67  const inference::ModelStatistics& end_status) const;
68 
69  inference::ModelStatistics getServerSideStatus() const;
70 
71  //members
72  unsigned maxOuterDim_;
73  unsigned outerDim_;
75  unsigned nEntries_;
78  bool verbose_;
81  grpc_compression_algorithm compressionAlgo_;
82  triton::client::Headers headers_;
83 
84  std::unique_ptr<triton::client::InferenceServerGrpcClient> client_;
85  //stores timeout, model name and version
86  std::vector<triton::client::InferOptions> options_;
87 
88 private:
91 
92  //private accessors only used by data
93  auto client() { return client_.get(); }
94  void addEntry(unsigned entry);
95  void resizeEntries(unsigned entry);
96 };
97 
98 #endif
bool verbose() const
Definition: TritonClient.h:43
bool noOuterDim() const
Definition: TritonClient.h:57
void getResults(const std::vector< std::shared_ptr< triton::client::InferResult >> &results)
const std::string & debugName() const
unsigned maxOuterDim_
Definition: TritonClient.h:72
bool setBatchSize(unsigned bsize)
bool noOuterDim_
Definition: TritonClient.h:74
~TritonClient() override
void addEntry(unsigned entry)
bool manualBatchMode_
Definition: TritonClient.h:77
bool useSharedMemory_
Definition: TritonClient.h:79
friend TritonOutputData
Definition: TritonClient.h:90
TritonBatchMode batchMode() const
Definition: TritonClient.h:42
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:84
TritonClient(const edm::ParameterSet &params, const std::string &debugName)
Definition: TritonClient.cc:55
friend TritonInputData
Definition: TritonClient.h:89
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
TritonBatchMode
Definition: TritonClient.h:19
void resetBatchMode()
TritonServerType serverType_
Definition: TritonClient.h:80
bool handle_exception(F &&call)
unsigned nEntries_
Definition: TritonClient.h:75
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:81
TritonServerType serverType() const
Definition: TritonClient.h:50
auto client()
Definition: TritonClient.h:93
void resizeEntries(unsigned entry)
inference::ModelStatistics getServerSideStatus() const
triton::client::Headers headers_
Definition: TritonClient.h:82
unsigned nEntries() const
unsigned long long uint64_t
Definition: Time.h:13
void setUseSharedMemory(bool useShm)
Definition: TritonClient.h:45
unsigned outerDim_
Definition: TritonClient.h:73
unsigned batchSize() const
unsigned outerDim() const
Definition: TritonClient.h:58
void evaluate() override
void setBatchMode(TritonBatchMode batchMode)
void reportServerSideStats(const ServerSideStats &stats) const
void reset() override
TritonServerType
Definition: TritonService.h:28
bool useSharedMemory() const
Definition: TritonClient.h:44
results
Definition: mysort.py:8
static void fillPSetDescription(edm::ParameterSetDescription &iDesc)
static uInt32 F(BLOWFISH_CTX *ctx, uInt32 x)
Definition: blowfish.cc:163
TritonBatchMode batchMode_
Definition: TritonClient.h:76
std::vector< triton::client::InferOptions > options_
Definition: TritonClient.h:86