CMS 3D CMS Logo

TensorFlow.cc
Go to the documentation of this file.
1 /*
2  * TensorFlow interface helpers.
3  * For more info, see https://gitlab.cern.ch/mrieger/CMSSW-DNN.
4  *
5  * Author: Marcel Rieger
6  */
7 
12 
13 namespace tensorflow {
14 
15  void Options::setThreading(int nThreads) {
16  _nThreads = nThreads;
17  // set number of threads used for intra and inter operation communication
18  _options.config.set_intra_op_parallelism_threads(nThreads);
19  _options.config.set_inter_op_parallelism_threads(nThreads);
20  }
21 
23  /*
24  * The TensorFlow backend configures the available devices using options provided in the sessionOptions proto.
25  * // Options from https://github.com/tensorflow/tensorflow/blob/c53dab9fbc9de4ea8b1df59041a5ffd3987328c3/tensorflow/core/protobuf/config.proto
26  *
27  * If the device_count["GPU"] = 0 GPUs are not used.
28  * The visible_device_list configuration is used to map the `visible` devices (from CUDA_VISIBLE_DEVICES) to `virtual` devices.
29  * If Backend::cpu is request, the GPU device is disallowed by device_count configuration.
30  * If Backend::cuda is request:
31  * - if ResourceInformation shows an available Nvidia GPU device:
32  * the device is used with memory_growth configuration (not allocating all cuda memory at once).
33  * - if no device is present: an exception is raised.
34  */
35 
37  if (backend == Backend::cpu) {
38  // disable GPU usage
39  (*_options.config.mutable_device_count())["GPU"] = 0;
40  _options.config.mutable_gpu_options()->set_visible_device_list("");
41  }
42  // NVidia GPU
43  else if (backend == Backend::cuda) {
44  if (not ri->nvidiaDriverVersion().empty()) {
45  // Take only the first GPU in the CUDA_VISIBLE_DEVICE list
46  (*_options.config.mutable_device_count())["GPU"] = 1;
47  _options.config.mutable_gpu_options()->set_visible_device_list("0");
48  // Do not allocate all the memory on the GPU at the beginning.
49  _options.config.mutable_gpu_options()->set_allow_growth(true);
50  } else {
52  ex << "Cuda backend requested, but no NVIDIA GPU available in the job";
53  ex.addContext("Calling tensorflow::setBackend()");
54  throw ex;
55  }
56  }
57  // ROCm and Intel GPU are still not supported
58  else if ((backend == Backend::rocm) || (backend == Backend::intel)) {
60  ex << "ROCm/Intel GPU backend requested, but TF is not compiled yet for this platform";
61  ex.addContext("Calling tensorflow::setBackend()");
62  throw ex;
63  }
64  // Get NVidia GPU if possible or fallback to CPU
65  else if (backend == Backend::best) {
66  // Check if a Nvidia GPU is availabl
67  if (not ri->nvidiaDriverVersion().empty()) {
68  // Take only the first GPU in the CUDA_VISIBLE_DEVICE list
69  (*_options.config.mutable_device_count())["GPU"] = 1;
70  _options.config.mutable_gpu_options()->set_visible_device_list("0");
71  // Do not allocate all the memory on the GPU at the beginning.
72  _options.config.mutable_gpu_options()->set_allow_growth(true);
73  } else {
74  // Just CPU support
75  (*_options.config.mutable_device_count())["GPU"] = 0;
76  _options.config.mutable_gpu_options()->set_visible_device_list("");
77  }
78  }
79  }
80 
81  void setLogging(const std::string& level) {
82  /*
83  * 0 = all messages are logged (default behavior)
84  * 1 = INFO messages are not printed
85  * 2 = INFO and WARNING messages are not printed
86  * 3 = INFO, WARNING, and ERROR messages are not printed
87  */
88  setenv("TF_CPP_MIN_LOG_LEVEL", level.c_str(), 0);
89  }
90 
91  MetaGraphDef* loadMetaGraphDef(const std::string& exportDir, const std::string& tag) {
92  Options default_options{};
93  return loadMetaGraphDef(exportDir, tag, default_options);
94  }
95 
96  MetaGraphDef* loadMetaGraphDef(const std::string& exportDir, const std::string& tag, Options& options) {
97  // objects to load the graph
98  Status status;
99  RunOptions runOptions;
100  SavedModelBundle bundle;
101 
102  // load the model
103  status = LoadSavedModel(options.getSessionOptions(), runOptions, exportDir, {tag}, &bundle);
104  if (!status.ok()) {
105  throw cms::Exception("InvalidMetaGraphDef")
106  << "error while loading metaGraphDef from '" << exportDir << "': " << status.ToString();
107  }
108 
109  // return a copy of the graph
110  return new MetaGraphDef(bundle.meta_graph_def);
111  }
112 
113  MetaGraphDef* loadMetaGraph(const std::string& exportDir, const std::string& tag, Options& options) {
114  edm::LogInfo("PhysicsTools/TensorFlow")
115  << "tensorflow::loadMetaGraph() is deprecated, use tensorflow::loadMetaGraphDef() instead";
116 
117  return loadMetaGraphDef(exportDir, tag, options);
118  }
119 
120  GraphDef* loadGraphDef(const std::string& pbFile) {
121  // objects to load the graph
122  Status status;
123 
124  // load it
125  GraphDef* graphDef = new GraphDef();
126  status = ReadBinaryProto(Env::Default(), pbFile, graphDef);
127 
128  // check for success
129  if (!status.ok()) {
130  throw cms::Exception("InvalidGraphDef")
131  << "error while loading graphDef from '" << pbFile << "': " << status.ToString();
132  }
133 
134  return graphDef;
135  }
136 
138  Options default_options{};
139  return createSession(default_options);
140  }
141 
143  // objects to create the session
144  Status status;
145 
146  // create a new, empty session
147  Session* session = nullptr;
148  status = NewSession(options.getSessionOptions(), &session);
149  if (!status.ok()) {
150  throw cms::Exception("InvalidSession") << "error while creating session: " << status.ToString();
151  }
152 
153  return session;
154  }
155 
156  Session* createSession(const MetaGraphDef* metaGraphDef, const std::string& exportDir, Options& options) {
157  // check for valid pointer
158  if (metaGraphDef == nullptr) {
159  throw cms::Exception("InvalidMetaGraphDef") << "error while creating session: metaGraphDef is nullptr";
160  }
161 
162  // check that the graph has nodes
163  if (metaGraphDef->graph_def().node_size() <= 0) {
164  throw cms::Exception("InvalidMetaGraphDef") << "error while creating session: graphDef has no nodes";
165  }
166 
167  Session* session = createSession(options);
168 
169  // add the graph def from the meta graph
170  Status status;
171  status = session->Create(metaGraphDef->graph_def());
172  if (!status.ok()) {
173  throw cms::Exception("InvalidMetaGraphDef")
174  << "error while attaching metaGraphDef to session: " << status.ToString();
175  }
176 
177  // restore variables using the variable and index files in the export directory
178  // first, find names and paths
179  std::string varFileTensorName = metaGraphDef->saver_def().filename_tensor_name();
180  std::string restoreOpName = metaGraphDef->saver_def().restore_op_name();
181  std::string varDir = io::JoinPath(exportDir, kSavedModelVariablesDirectory);
182  std::string indexFile = io::JoinPath(varDir, MetaFilename(kSavedModelVariablesFilename));
183  std::string varFile = io::JoinPath(varDir, kSavedModelVariablesFilename);
184 
185  // when the index file is missing, there's nothing to do
186  if (!Env::Default()->FileExists(indexFile).ok()) {
187  return session;
188  }
189 
190  // create a tensor to store the variable file
191  Tensor varFileTensor(DT_STRING, TensorShape({}));
192  varFileTensor.scalar<tensorflow::tstring>()() = varFile;
193 
194  // run the restore op
195  status = session->Run({{varFileTensorName, varFileTensor}}, {}, {restoreOpName}, nullptr);
196  if (!status.ok()) {
197  throw cms::Exception("InvalidSession") << "error while restoring variables in session: " << status.ToString();
198  }
199 
200  return session;
201  }
202 
203  Session* createSession(const GraphDef* graphDef) {
204  Options default_options{};
205  return createSession(graphDef, default_options);
206  }
207 
208  Session* createSession(const GraphDef* graphDef, Options& options) {
209  // check for valid pointer
210  if (graphDef == nullptr) {
211  throw cms::Exception("InvalidGraphDef") << "error while creating session: graphDef is nullptr";
212  }
213 
214  // check that the graph has nodes
215  if (graphDef->node_size() <= 0) {
216  throw cms::Exception("InvalidGraphDef") << "error while creating session: graphDef has no nodes";
217  }
218 
219  // create a new, empty session
220  Session* session = createSession(options);
221 
222  // add the graph def
223  Status status;
224  status = session->Create(*graphDef);
225 
226  // check for success
227  if (!status.ok()) {
228  throw cms::Exception("InvalidSession") << "error while attaching graphDef to session: " << status.ToString();
229  }
230 
231  return session;
232  }
233 
234  bool closeSession(Session*& session) {
235  if (session == nullptr) {
236  return true;
237  }
238 
239  // close and delete the session
240  Status status = session->Close();
241  delete session;
242 
243  // reset the pointer
244  session = nullptr;
245 
246  return status.ok();
247  }
248 
249  bool closeSession(const Session*& session) {
250  auto s = const_cast<Session*>(session);
251  bool state = closeSession(s);
252 
253  // reset the pointer
254  session = nullptr;
255 
256  return state;
257  }
258 
259  void run(Session* session,
260  const NamedTensorList& inputs,
261  const std::vector<std::string>& outputNames,
262  std::vector<Tensor>* outputs,
263  const thread::ThreadPoolOptions& threadPoolOptions) {
264  if (session == nullptr) {
265  throw cms::Exception("InvalidSession") << "cannot run empty session";
266  }
267 
268  // create empty run options
269  RunOptions runOptions;
270 
271  // run and check the status
272  Status status = session->Run(runOptions, inputs, outputNames, {}, outputs, nullptr, threadPoolOptions);
273  if (!status.ok()) {
274  throw cms::Exception("InvalidRun") << "error while running session: " << status.ToString();
275  }
276  }
277 
278  void run(Session* session,
279  const NamedTensorList& inputs,
280  const std::vector<std::string>& outputNames,
281  std::vector<Tensor>* outputs,
282  thread::ThreadPoolInterface* threadPool) {
283  // create thread pool options
284  thread::ThreadPoolOptions threadPoolOptions;
285  threadPoolOptions.inter_op_threadpool = threadPool;
286  threadPoolOptions.intra_op_threadpool = threadPool;
287 
288  // run
289  run(session, inputs, outputNames, outputs, threadPoolOptions);
290  }
291 
292  void run(Session* session,
293  const NamedTensorList& inputs,
294  const std::vector<std::string>& outputNames,
295  std::vector<Tensor>* outputs,
296  const std::string& threadPoolName) {
297  // lookup the thread pool and forward the call accordingly
298  if (threadPoolName == "no_threads") {
300  } else if (threadPoolName == "tbb") {
301  // the TBBTreadPool singleton should be already initialized before with a number of threads
303  } else if (threadPoolName == "tensorflow") {
304  run(session, inputs, outputNames, outputs, nullptr);
305  } else {
306  throw cms::Exception("UnknownThreadPool")
307  << "thread pool implementation'" << threadPoolName << "' unknown, use 'no_threads', 'tbb', or 'tensorflow'";
308  }
309  }
310 
311  void run(Session* session,
312  const std::vector<std::string>& outputNames,
313  std::vector<Tensor>* outputs,
314  const std::string& threadPoolName) {
315  run(session, {}, outputNames, outputs, threadPoolName);
316  }
317 
319  // delete the session if set
320  Session* s = session.load();
321  if (s != nullptr) {
323  session.store(nullptr);
324  }
325 
326  // delete the graph if set
327  if (graph.load() != nullptr) {
328  delete graph.load();
329  graph.store(nullptr);
330  }
331  }
332 
333 } // namespace tensorflow
std::vector< NamedTensor > NamedTensorList
Definition: TensorFlow.h:31
void setBackend(Backend backend=Backend::cpu)
Definition: TensorFlow.cc:22
virtual std::string const & nvidiaDriverVersion() const =0
GraphDef * loadGraphDef(const std::string &pbFile)
Definition: TensorFlow.cc:120
MetaGraphDef * loadMetaGraphDef(const std::string &exportDir, const std::string &tag=kSavedModelTagServe)
Definition: TensorFlow.cc:91
std::atomic< Session * > session
Definition: TensorFlow.h:192
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:259
bool closeSession(Session *&session)
Definition: TensorFlow.cc:234
static TBBThreadPool & instance(int nThreads=-1)
Definition: TBBThreadPool.h:24
MetaGraphDef * loadMetaGraph(const std::string &exportDir, const std::string &tag, Options &Options)
Definition: TensorFlow.cc:113
Session * createSession()
Definition: TensorFlow.cc:137
Log< level::Info, false > LogInfo
void setLogging(const std::string &level="3")
Definition: TensorFlow.cc:81
std::atomic< GraphDef * > graph
Definition: TensorFlow.h:191
constexpr bool Default
Definition: SoACommon.h:73
void addContext(std::string const &context)
Definition: Exception.cc:169
void setThreading(int nThreads=1)
Definition: TensorFlow.cc:15
static NoThreadPool & instance()
Definition: NoThreadPool.h:22
SessionOptions _options
Definition: TensorFlow.h:36