CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
TritonData.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_SonicTriton_TritonData
2 #define HeterogeneousCore_SonicTriton_TritonData
3 
6 
7 #include <vector>
8 #include <string>
9 #include <unordered_map>
10 #include <numeric>
11 #include <algorithm>
12 #include <memory>
13 #include <atomic>
14 
15 #include "grpc_client.h"
16 #include "grpc_service.pb.h"
17 
18 //forward declaration
19 class TritonClient;
20 template <typename IO>
22 template <typename IO>
24 template <typename IO>
26 #ifdef TRITON_ENABLE_GPU
27 template <typename IO>
28 class TritonGpuShmResource;
29 #endif
30 
31 //aliases for local input and output types
32 template <typename DT>
33 using TritonInput = std::vector<std::vector<DT>>;
34 template <typename DT>
35 using TritonOutput = std::vector<edm::Span<const DT*>>;
36 
37 //other useful typdefs
38 template <typename DT>
39 using TritonInputContainer = std::shared_ptr<TritonInput<DT>>;
40 
41 //store all the info needed for triton input and output
42 //NOTE: this class is not const-thread-safe, and should only be used with stream or one modules
43 //(generally recommended for SONIC, but especially necessary here)
44 template <typename IO>
45 class TritonData {
46 public:
47  using Result = triton::client::InferResult;
48  using TensorMetadata = inference::ModelMetadataResponse_TensorMetadata;
49  using ShapeType = std::vector<int64_t>;
51 
52  //constructor
53  TritonData(const std::string& name, const TensorMetadata& model_info, TritonClient* client, const std::string& pid);
54 
55  //some members can be modified
56  void setShape(const ShapeType& newShape);
57  void setShape(unsigned loc, int64_t val);
58 
59  //io accessors
60  template <typename DT>
61  TritonInputContainer<DT> allocate(bool reserve = true);
62  template <typename DT>
64  void prepare();
65  template <typename DT>
67 
68  //const accessors
69  const ShapeView& shape() const { return shape_; }
70  int64_t byteSize() const { return byteSize_; }
71  const std::string& dname() const { return dname_; }
72  unsigned batchSize() const { return batchSize_; }
73 
74  //utilities
75  bool variableDims() const { return variableDims_; }
76  int64_t sizeDims() const { return productDims_; }
77  //default to dims if shape isn't filled
78  int64_t sizeShape() const { return variableDims_ ? dimProduct(shape_) : sizeDims(); }
79 
80 private:
81  friend class TritonClient;
82  friend class TritonMemResource<IO>;
83  friend class TritonHeapResource<IO>;
84  friend class TritonCpuShmResource<IO>;
85 #ifdef TRITON_ENABLE_GPU
86  friend class TritonGpuShmResource<IO>;
87 #endif
88 
89  //private accessors only used internally or by client
90  unsigned fullLoc(unsigned loc) const { return loc + (noBatch_ ? 0 : 1); }
91  void setBatchSize(unsigned bsize);
92  void reset();
93  void setResult(std::shared_ptr<Result> result) { result_ = result; }
94  IO* data() { return data_.get(); }
95  void updateMem(size_t size);
96  void computeSizes();
97  void resetSizes();
98  triton::client::InferenceServerGrpcClient* client();
99 
100  //helpers
101  bool anyNeg(const ShapeView& vec) const {
102  return std::any_of(vec.begin(), vec.end(), [](int64_t i) { return i < 0; });
103  }
104  int64_t dimProduct(const ShapeView& vec) const {
105  return std::accumulate(vec.begin(), vec.end(), 1, std::multiplies<int64_t>());
106  }
107  void createObject(IO** ioptr);
108  //generates a unique id number for each instance of the class
109  unsigned uid() const {
110  static std::atomic<unsigned> uid{0};
111  return ++uid;
112  }
113  std::string xput() const;
114 
115  //members
117  std::shared_ptr<IO> data_;
119  bool useShm_;
122  bool noBatch_;
123  unsigned batchSize_;
127  int64_t productDims_;
129  inference::DataType dtype_;
130  int64_t byteSize_;
131  size_t sizeShape_;
134  //can be modified in otherwise-const fromServer() method in TritonMemResource::copyOutput():
135  //TritonMemResource holds a non-const pointer to an instance of this class
136  //so that TritonOutputGpuShmResource can store data here
137  std::shared_ptr<void> holder_;
138  std::shared_ptr<TritonMemResource<IO>> memResource_;
139  std::shared_ptr<Result> result_;
140  //can be modified in otherwise-const fromServer() method to prevent multiple calls
141  CMS_SA_ALLOW mutable bool done_{};
142 };
143 
145 using TritonInputMap = std::unordered_map<std::string, TritonInputData>;
147 using TritonOutputMap = std::unordered_map<std::string, TritonOutputData>;
148 
149 //avoid "explicit specialization after instantiation" error
150 template <>
152 template <>
154 template <>
155 template <typename DT>
157 template <>
158 template <typename DT>
159 void TritonInputData::toServer(std::shared_ptr<TritonInput<DT>> ptr);
160 template <>
162 template <>
163 template <typename DT>
165 template <>
167 template <>
169 template <>
170 void TritonInputData::createObject(triton::client::InferInput** ioptr);
171 template <>
172 void TritonOutputData::createObject(triton::client::InferRequestedOutput** ioptr);
173 
174 //explicit template instantiation declarations
175 extern template class TritonData<triton::client::InferInput>;
177 
178 #endif
std::shared_ptr< IO > data_
Definition: TritonData.h:117
void setBatchSize(unsigned bsize)
Definition: TritonData.cc:99
T begin() const
Definition: Span.h:20
#define CMS_SA_ALLOW
std::shared_ptr< TritonInput< DT >> TritonInputContainer
Definition: TritonData.h:39
int64_t dimProduct(const ShapeView &vec) const
Definition: TritonData.h:104
std::shared_ptr< void > holder_
Definition: TritonData.h:137
bool done_
Definition: TritonData.h:141
size_t totalByteSize_
Definition: TritonData.h:133
TritonInputContainer< DT > allocate(bool reserve=true)
Definition: TritonData.cc:150
T end() const
Definition: Span.h:21
size_t byteSizePerBatch_
Definition: TritonData.h:132
unsigned fullLoc(unsigned loc) const
Definition: TritonData.h:90
void computeSizes()
Definition: TritonData.cc:106
const ShapeType dims_
Definition: TritonData.h:121
void setResult(std::shared_ptr< Result > result)
Definition: TritonData.h:93
std::string xput() const
Definition: TritonData.cc:58
bool useShm_
Definition: TritonData.h:119
ShapeView shape_
Definition: TritonData.h:125
int64_t sizeShape() const
Definition: TritonData.h:78
void updateMem(size_t size)
Definition: TritonData.cc:121
bool variableDims() const
Definition: TritonData.h:75
tuple result
Definition: mps_fire.py:311
int64_t productDims_
Definition: TritonData.h:127
bool anyNeg(const ShapeView &vec) const
Definition: TritonData.h:101
int64_t byteSize_
Definition: TritonData.h:130
size_t sizeShape_
Definition: TritonData.h:131
const ShapeView & shape() const
Definition: TritonData.h:69
std::shared_ptr< Result > result_
Definition: TritonData.h:139
unsigned batchSize() const
Definition: TritonData.h:72
void reset()
Definition: TritonData.cc:235
IO * data()
Definition: TritonData.h:94
triton::client::InferResult Result
Definition: TritonData.h:47
TritonClient * client_
Definition: TritonData.h:118
void prepare()
Definition: TritonData.cc:198
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
Definition: TritonData.cc:19
std::unordered_map< std::string, TritonInputData > TritonInputMap
Definition: TritonData.h:145
void setShape(const ShapeType &newShape)
std::vector< std::vector< DT >> TritonInput
Definition: TritonData.h:33
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
Definition: TritonData.h:48
void createObject(IO **ioptr)
TritonOutput< DT > fromServer() const
Definition: TritonData.cc:206
triton::client::InferenceServerGrpcClient * client()
Definition: TritonData.cc:68
unsigned uid() const
Definition: TritonData.h:109
void resetSizes()
Definition: TritonData.cc:112
inference::DataType dtype_
Definition: TritonData.h:129
bool noBatch_
Definition: TritonData.h:122
std::vector< edm::Span< const DT * >> TritonOutput
Definition: TritonData.h:35
std::shared_ptr< TritonMemResource< IO > > memResource_
Definition: TritonData.h:138
std::string dname_
Definition: TritonData.h:128
ShapeType fullShape_
Definition: TritonData.h:124
std::string name_
Definition: TritonData.h:116
std::vector< int64_t > ShapeType
Definition: TritonData.h:49
unsigned batchSize_
Definition: TritonData.h:123
std::string shmName_
Definition: TritonData.h:120
const std::string & dname() const
Definition: TritonData.h:71
bool variableDims_
Definition: TritonData.h:126
void toServer(TritonInputContainer< DT > ptr)
Definition: TritonData.cc:164
std::unordered_map< std::string, TritonOutputData > TritonOutputMap
Definition: TritonData.h:147
int64_t sizeDims() const
Definition: TritonData.h:76
int64_t byteSize() const
Definition: TritonData.h:70
tuple size
Write out results.