CMS 3D CMS Logo

TritonClient.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_SonicTriton_TritonClient
2 #define HeterogeneousCore_SonicTriton_TritonClient
3 
9 
10 #include <map>
11 #include <vector>
12 #include <string>
13 #include <exception>
14 #include <unordered_map>
15 
16 #include "grpc_client.h"
17 #include "grpc_service.pb.h"
18 
19 enum class TritonBatchMode { Rectangular = 1, Ragged = 2 };
20 
21 class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
22 public:
23  struct ServerSideStats {
32  };
33 
34  //constructor
36 
37  //destructor
38  ~TritonClient() override;
39 
40  //accessors
41  unsigned batchSize() const;
42  TritonBatchMode batchMode() const { return batchMode_; }
43  bool verbose() const { return verbose_; }
44  bool useSharedMemory() const { return useSharedMemory_; }
45  void setUseSharedMemory(bool useShm) { useSharedMemory_ = useShm; }
46  bool setBatchSize(unsigned bsize);
48  void resetBatchMode();
49  void reset() override;
51  bool isLocal() const { return isLocal_; }
52 
53  //for fillDescriptions
55 
56 protected:
57  //helpers
58  bool noOuterDim() const { return noOuterDim_; }
59  unsigned outerDim() const { return outerDim_; }
60  unsigned nEntries() const;
61  void getResults(const std::vector<std::shared_ptr<triton::client::InferResult>>& results);
62  void evaluate() override;
63  template <typename F>
64  bool handle_exception(F&& call);
65 
66  void reportServerSideStats(const ServerSideStats& stats) const;
67  ServerSideStats summarizeServerStats(const inference::ModelStatistics& start_status,
68  const inference::ModelStatistics& end_status) const;
69 
70  inference::ModelStatistics getServerSideStatus() const;
71 
72  //members
73  unsigned maxOuterDim_;
74  unsigned outerDim_;
76  unsigned nEntries_;
79  bool verbose_;
82  bool isLocal_;
83  grpc_compression_algorithm compressionAlgo_;
84  triton::client::Headers headers_;
85 
86  std::unique_ptr<triton::client::InferenceServerGrpcClient> client_;
87  //stores timeout, model name and version
88  std::vector<triton::client::InferOptions> options_;
89 
90 private:
93 
94  //private accessors only used by data
95  auto client() { return client_.get(); }
96  void addEntry(unsigned entry);
97  void resizeEntries(unsigned entry);
98 };
99 
100 #endif
bool verbose() const
Definition: TritonClient.h:43
bool noOuterDim() const
Definition: TritonClient.h:58
void getResults(const std::vector< std::shared_ptr< triton::client::InferResult >> &results)
const std::string & debugName() const
unsigned maxOuterDim_
Definition: TritonClient.h:73
bool setBatchSize(unsigned bsize)
bool noOuterDim_
Definition: TritonClient.h:75
~TritonClient() override
void addEntry(unsigned entry)
bool manualBatchMode_
Definition: TritonClient.h:78
bool useSharedMemory_
Definition: TritonClient.h:80
friend TritonOutputData
Definition: TritonClient.h:92
TritonBatchMode batchMode() const
Definition: TritonClient.h:42
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:86
TritonClient(const edm::ParameterSet &params, const std::string &debugName)
Definition: TritonClient.cc:56
friend TritonInputData
Definition: TritonClient.h:91
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
TritonBatchMode
Definition: TritonClient.h:19
void resetBatchMode()
TritonServerType serverType_
Definition: TritonClient.h:81
bool handle_exception(F &&call)
unsigned nEntries_
Definition: TritonClient.h:76
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:83
TritonServerType serverType() const
Definition: TritonClient.h:50
auto client()
Definition: TritonClient.h:95
void resizeEntries(unsigned entry)
inference::ModelStatistics getServerSideStatus() const
triton::client::Headers headers_
Definition: TritonClient.h:84
unsigned nEntries() const
bool isLocal() const
Definition: TritonClient.h:51
unsigned long long uint64_t
Definition: Time.h:13
void setUseSharedMemory(bool useShm)
Definition: TritonClient.h:45
unsigned outerDim_
Definition: TritonClient.h:74
unsigned batchSize() const
unsigned outerDim() const
Definition: TritonClient.h:59
void evaluate() override
void setBatchMode(TritonBatchMode batchMode)
void reportServerSideStats(const ServerSideStats &stats) const
void reset() override
TritonServerType
Definition: TritonService.h:29
bool useSharedMemory() const
Definition: TritonClient.h:44
results
Definition: mysort.py:8
static void fillPSetDescription(edm::ParameterSetDescription &iDesc)
static uInt32 F(BLOWFISH_CTX *ctx, uInt32 x)
Definition: blowfish.cc:163
TritonBatchMode batchMode_
Definition: TritonClient.h:77
std::vector< triton::client::InferOptions > options_
Definition: TritonClient.h:88