d9/d77/NTSession_8cc_source.html

 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 //NOTE: The memory layout of the Node class changes depending on if NDEBUG was
 // set when tensorflow was compiled. The reason is Node class holds two edgeset
 // class instances and edgeset adds a member data if NDEBUG is set

 /*
 This file is an adaptation of the original direct_session.cc file located at
 https://github.com/tensorflow/tensorflow/blob/v1.3.0/tensorflow/core/common_runtime/direct_session.cc
 to meet the demands of the software environment developed and used by the CMS collaboration.

 Changes with respect to the original code are documented in the NTSession.h header file.
 */

 #if !defined(NDEBUG)
 #define NDEBUG 1
 #endif

 #include "NTSession.h"

 #include <atomic>
 #include <string>
 #include <vector>

 #include "FWCore/Utilities/interface/thread_safety_macros.h"

 #include "tensorflow/core/common_runtime/constant_folding.h"
 #include "tensorflow/core/common_runtime/debugger_state_interface.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/executor.h"
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/common_runtime/graph_optimizer.h"
 #include "tensorflow/core/common_runtime/memory_types.h"
 #include "tensorflow/core/common_runtime/optimization_registry.h"
 #include "tensorflow/core/common_runtime/simple_placer.h"
 #include "tensorflow/core/common_runtime/step_stats_collector.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/graph.pb_text.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/graph_def_util.h"
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/graph/graph_partition.h"
 #include "tensorflow/core/graph/subgraph.h"
 #include "tensorflow/core/graph/tensor_id.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/lib/gtl/stl_util.h"
 #include "tensorflow/core/lib/monitoring/counter.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/device_name_utils.h"
 #include "tensorflow/core/util/env_var.h"

 #if GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/gpu/gpu_tracer.h"
 #endif  // GOOGLE_CUDA

 namespace tensorflow {

 namespace {

 CMS_THREAD_SAFE auto* nothreads_session_runs = monitoring::Counter<0>::New(
     "/tensorflow/core/nothreads_session_runs",
     "The number of times NTSession::Run() has been called.");


 // TODO(vrv): Figure out how to unify the many different functions
 // that generate RendezvousKey, since many of them have to be
 // consistent with each other.
 string GetRendezvousKey(const string& tensor_name,
                         const DeviceAttributes& device_info,
                         const FrameAndIter& frame_iter) {
   return strings::StrCat(device_info.name(), ";",
                          strings::FpToString(device_info.incarnation()), ";",
                          device_info.name(), ";", tensor_name, ";",
                          frame_iter.frame_id, ":", frame_iter.iter_id);
 }

 }  // namespace

 class NTSessionFactory : public SessionFactory {
  public:
   NTSessionFactory() {}

   bool AcceptsOptions(const SessionOptions& options) override {
     return options.target == "no_threads";
   }

   Session* NewSession(const SessionOptions& options) override {
     // Must do this before the CPU allocator is created.
     if (options.config.graph_options().build_cost_model() > 0) {
       EnableCPUAllocatorFullStats(true);
     }
     std::vector<Device*> devices;
     Status s = DeviceFactory::AddDevices(
         options, "/job:localhost/replica:0/task:0", &devices);
     if (!s.ok()) {
       LOG(ERROR) << s;
       return nullptr;
     }

     NTSession* session =
         new NTSession(options, new DeviceMgr(devices), this);
     {
       mutex_lock l(sessions_lock_);
       sessions_.push_back(session);
     }
     return session;
   }

   Status Reset(const SessionOptions& options,
                const std::vector<string>& containers) override {
     std::vector<NTSession*> sessions_to_reset;
     {
       mutex_lock l(sessions_lock_);
       // We create a copy to ensure that we don't have a deadlock when
       // session->Close calls the NTSessionFactory.Deregister, which
       // acquires sessions_lock_.
       std::swap(sessions_to_reset, sessions_);
     }
     Status s;
     for (auto session : sessions_to_reset) {
       s.Update(session->Reset(containers));
     }
     // TODO(suharshs): Change the Reset behavior of all SessionFactories so that
     // it doesn't close the sessions?
     for (auto session : sessions_to_reset) {
       s.Update(session->Close());
     }
     return s;
   }

   void Deregister(const NTSession* session) {
     mutex_lock l(sessions_lock_);
     sessions_.erase(std::remove(sessions_.begin(), sessions_.end(), session),
                     sessions_.end());
   }

  private:
   mutex sessions_lock_;
   std::vector<NTSession*> sessions_ GUARDED_BY(sessions_lock_);
 };

 class NTSessionRegistrar {
  public:
   NTSessionRegistrar() {
     SessionFactory::Register("NOTHREADS_SESSION", new NTSessionFactory());
   }
 };
 static NTSessionRegistrar registrar;

 std::atomic_int_fast64_t NTSession::step_id_counter_(1);

 // NOTE: On Android with a single device, there is never
 // a risk of an OpKernel blocking indefinitely:
 //
 // 1) No operations do I/O that depends on other simultaneous kernels,
 //
 // 2) Recv nodes always complete immediately: The inputs are sent into
 //    the local rendezvous before we start the executor, so the
 //    corresponding recvs will not block.
 //
 // Based on these assumptions, we can use the same thread pool for
 // both "non-blocking" and "blocking" OpKernels on Android.
 //
 // This may change down the road when we add support for multiple
 // devices that run concurrently, in which case we will need to
 // revisit this decision.
 void NTSession::SchedClosure(std::function<void()> c) {

   // On Android, there is no implementation of ThreadPool that takes
   // std::function, only Closure, which we cannot easily convert.
   //
   // Instead, we just run the function in-line, which is currently
   // safe given the reasoning above.

   //Override to allow CMSSW FWK to schedule
   c();
   //pool->Schedule(std::move(c));
 }

 NTSession::NTSession(const SessionOptions& options,
                              const DeviceMgr* device_mgr,
                              NTSessionFactory* const factory)
     : options_(options),
       device_mgr_(device_mgr),
       factory_(factory),
       cancellation_manager_(new CancellationManager()),
       operation_timeout_in_ms_(options_.config.operation_timeout_in_ms()) {
   // The default value of sync_on_finish will be flipped soon and this
   // environment variable will be removed as well.
   Status status =
       ReadBoolFromEnvVar("TF_SYNC_ON_FINISH", true, &sync_on_finish_);
   if (!status.ok()) {
     LOG(ERROR) << status.error_message();
   }
   // NOTE(mrry): We do not need to use a unique string for the session
   // handle, because NTSession owns its devices. This may change
   // in future versions.
   session_handle_ = "no_threads";
   int devices_added = 0;
   if (options.config.log_device_placement()) {
     const string mapping_str = device_mgr_->DeviceMappingString();
     if (mapping_str.empty()) {
       printf("Device mapping: no known devices.\n");
     } else {
       printf("Device mapping:\n%s", mapping_str.c_str());
     }
     LOG(INFO) << "Device mapping:\n" << mapping_str;
   }
   for (auto d : device_mgr_->ListDevices()) {
     devices_.push_back(d);
     device_set_.AddDevice(d);
     d->op_segment()->AddHold(session_handle_);

     // The first device added is special: it is the 'client device' (a
     // CPU device) from which we feed and fetch Tensors.
     if (devices_added == 0) {
       device_set_.set_client_device(d);
     }
     ++devices_added;
   }
 }

 NTSession::~NTSession() {
   if (!closed_) Close().IgnoreError();
   for (auto& it : partial_runs_) {
     it.second.reset(nullptr);
   }
   for (auto& it : executors_) {
     it.second.reset();
   }
   for (auto d : device_mgr_->ListDevices()) {
     d->op_segment()->RemoveHold(session_handle_);
   }
   delete cancellation_manager_;

   execution_state_.reset(nullptr);
   flib_def_.reset(nullptr);
 }

 Status NTSession::MaybeInitializeExecutionState(
     const GraphDef& graph, bool* out_already_initialized) {
   // If already initialized, do nothing.
   if (flib_def_ && execution_state_) {
     *out_already_initialized = true;
     return Status::OK();
   }
   // Set up the per-session execution state.
   // NOTE(mrry): The function library created here will be used for
   // all subsequent extensions of the graph.
   flib_def_.reset(
       new FunctionLibraryDefinition(OpRegistry::Global(), graph.library()));
   SimpleGraphExecutionStateOptions options;
   options.device_set = &device_set_;
   options.session_options = &options_;
   // TODO(mrry,suharshs): We explicitly copy `graph` so that
   // `MakeForBaseGraph()` can take ownership of its
   // contents. Previously this happened implicitly in calls to the
   // `SimpleGraphExecutionState`. Other sessions call
   // `MakeForBaseGraph` in such a way that we can destructively read
   // the passed-in `GraphDef`. In principle we could do the same here,
   // with a wider refactoring; we might revise the direct session so
   // that it copies the graph fewer times.
   GraphDef temp(graph);
   TF_RETURN_IF_ERROR(SimpleGraphExecutionState::MakeForBaseGraph(
       &temp, options, &execution_state_));
   graph_created_ = true;
   *out_already_initialized = false;
   return Status::OK();
 }

 Status NTSession::Create(const GraphDef& graph) {
   TF_RETURN_IF_ERROR(init_error_);
   if (graph.node_size() > 0) {
     mutex_lock l(graph_def_lock_);
     if (graph_created_) {
       return errors::AlreadyExists(
           "A Graph has already been created for this session.");
     }
     return ExtendLocked(graph);
   }
   return Status::OK();
 }

 Status NTSession::Extend(const GraphDef& graph) {
   TF_RETURN_IF_ERROR(CheckNotClosed());
   mutex_lock l(graph_def_lock_);
   return ExtendLocked(graph);
 }

 Status NTSession::ExtendLocked(const GraphDef& graph) {
   bool already_initialized;
   // If this is the first call, we can initialize the execution state
   // with `graph` and do not need to call `Extend()`.
   TF_RETURN_IF_ERROR(
       MaybeInitializeExecutionState(graph, &already_initialized));
   if (already_initialized) {
     TF_RETURN_IF_ERROR(flib_def_->AddLibrary(graph.library()));
     std::unique_ptr<SimpleGraphExecutionState> state;
     TF_RETURN_IF_ERROR(execution_state_->Extend(graph, &state));
     execution_state_.swap(state);
   }
   return Status::OK();
 }

 Status NTSession::Run(const NamedTensorList& inputs,
                           const std::vector<string>& output_names,
                           const std::vector<string>& target_nodes,
                           std::vector<Tensor>* outputs) {
   RunMetadata run_metadata;
   return Run(RunOptions(), inputs, output_names, target_nodes, outputs,
              &run_metadata);
 }

 Status NTSession::CreateDebuggerState(
     const DebugOptions& debug_options, int64 session_run_index,
     int64 executor_step_index, const std::vector<string>& input_names,
     const std::vector<string>& output_names,
     const std::vector<string>& target_names,
     std::unique_ptr<DebuggerStateInterface>* debugger_state) {
   TF_RETURN_IF_ERROR(
       DebuggerStateRegistry::CreateState(debug_options, debugger_state));
   TF_RETURN_IF_ERROR(debugger_state->get()->PublishDebugMetadata(
       debug_options.global_step(), session_run_index, executor_step_index,
       input_names, output_names, target_names));
   return Status::OK();
 }

 Status NTSession::DecorateAndPublishGraphForDebug(
     const DebugOptions& debug_options, Graph* graph, Device* device) {
   std::unique_ptr<DebugGraphDecoratorInterface> decorator;
   TF_RETURN_IF_ERROR(
       DebugGraphDecoratorRegistry::CreateDecorator(debug_options, &decorator));

   TF_RETURN_IF_ERROR(decorator->DecorateGraph(graph, device));
   TF_RETURN_IF_ERROR(decorator->PublishGraph(*graph, device->name()));
   return Status::OK();
 }

 Status NTSession::Run(const RunOptions& run_options,
                           const NamedTensorList& inputs,
                           const std::vector<string>& output_names,
                           const std::vector<string>& target_nodes,
                           std::vector<Tensor>* outputs,
                           RunMetadata* run_metadata) {
   TF_RETURN_IF_ERROR(CheckNotClosed());
   nothreads_session_runs->GetCell()->IncrementBy(1);
   {
     mutex_lock l(graph_def_lock_);
     if (!graph_created_) {
       return errors::InvalidArgument(
           "Session was not created with a graph before Run()!");
     }
   }

   // Extract the inputs names for this run of the session.
   std::vector<string> input_tensor_names;
   input_tensor_names.reserve(inputs.size());
   for (const auto& it : inputs) {
     input_tensor_names.push_back(it.first);
   }


   // Check if we already have an executor for these arguments.
   ExecutorsAndKeys* executors_and_keys;
   RunStateArgs run_state_args(run_options.debug_options());

   Executor::Args args;
   args.step_id = step_id_counter_.fetch_add(1);

   TF_RETURN_IF_ERROR(
       GetOrCreateExecutors(input_tensor_names, output_names, target_nodes,
                            &executors_and_keys, &run_state_args));
   const int64 executor_step_count = executors_and_keys->step_count.fetch_add(1);

   std::unique_ptr<DebuggerStateInterface> debugger_state;
   if (!run_options.debug_options().debug_tensor_watch_opts().empty()) {
     TF_RETURN_IF_ERROR(CreateDebuggerState(
         run_options.debug_options(), args.step_id, executor_step_count,
         input_tensor_names, output_names, target_nodes, &debugger_state));
   }

   // Configure a call frame for the step, which we use to feed and
   // fetch values to and from the executors.
   FunctionCallFrame call_frame(executors_and_keys->input_types,
                                executors_and_keys->output_types);
   gtl::InlinedVector<Tensor, 4> feed_args(inputs.size());
   for (const auto& it : inputs) {
     if (it.second.dtype() == DT_RESOURCE) {
       Tensor tensor_from_handle;
       TF_RETURN_IF_ERROR(
           ResourceHandleToInputTensor(it.second, &tensor_from_handle));
       feed_args[executors_and_keys->input_name_to_index[it.first]] =
           tensor_from_handle;
     } else {
       feed_args[executors_and_keys->input_name_to_index[it.first]] = it.second;
     }
   }
   Status s = call_frame.SetArgs(feed_args);
   if (errors::IsInternal(s)) {
     return errors::InvalidArgument(s.error_message());
   } else if (!s.ok()) {
     return s;
   }

   // Create a run state and start execution.
   RunState run_state(args.step_id, &devices_);
   run_state.rendez = new IntraProcessRendezvous(device_mgr_.get());
   CancellationManager step_cancellation_manager;
   args.call_frame = &call_frame;

   // Start parallel Executors.
   const size_t num_executors = executors_and_keys->items.size();
   ExecutorBarrier* barrier = new ExecutorBarrier(
       num_executors, run_state.rendez, [&run_state](const Status& ret) {
         {
           mutex_lock l(run_state.mu_);
           run_state.status.Update(ret);
         }
         run_state.executors_done.Notify();
       });

   args.rendezvous = run_state.rendez;
   args.cancellation_manager = &step_cancellation_manager;
   args.runner = [this](Executor::Args::Closure c) {
     SchedClosure(std::move(c));
   };
   args.session_state = &session_state_;
   args.tensor_store = &run_state.tensor_store;
   args.step_container = &run_state.step_container;
   if (LogMemory::IsEnabled()) {
     LogMemory::RecordStep(args.step_id, run_state_args.handle);
   }
   args.sync_on_finish = sync_on_finish_;

   const bool do_trace = (run_options.trace_level() > RunOptions::NO_TRACE);

   bool update_cost_model = false;
   if (options_.config.graph_options().build_cost_model() > 0) {
     const int64 build_cost_model_every =
         options_.config.graph_options().build_cost_model();
     const int64 build_cost_model_after =
         options_.config.graph_options().build_cost_model_after();
     int64 measure_step_count = executor_step_count - build_cost_model_after;
     if (measure_step_count >= 0) {
       update_cost_model =
           ((measure_step_count + 1) % build_cost_model_every == 0);
     }
   }
   if (do_trace || update_cost_model) {
     run_state.collector.reset(
         new StepStatsCollector(run_metadata->mutable_step_stats()));
     args.stats_collector = run_state.collector.get();
   }

 #if GOOGLE_CUDA
   std::unique_ptr<GPUTracer> tracer;
   if (run_options.trace_level() >= RunOptions::HARDWARE_TRACE) {
     tracer.reset(CreateGPUTracer());
     // tracer will be NULL on non-GPU platforms.
     // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object!
     if (tracer) tracer->Start().IgnoreError();
   }
 #endif  // GOOGLE_CUDA

   // Register this step with session's cancellation manager, so that
   // `Session::Close()` will cancel the step.
   CancellationToken cancellation_token =
       cancellation_manager_->get_cancellation_token();
   bool already_cancelled = !cancellation_manager_->RegisterCallback(
       cancellation_token, [&step_cancellation_manager]() {
         step_cancellation_manager.StartCancel();
       });
   if (already_cancelled) {
     // NOTE(mrry): If we don't explicitly notify
     // `run_state.executors_done`, the RunState destructor would
     // block on this notification.
     run_state.executors_done.Notify();
     delete barrier;
     return errors::Cancelled("Run call was cancelled");
   }

   for (const auto& item : executors_and_keys->items) {
     item.executor->RunAsync(args, barrier->Get());
   }

   WaitForNotification(&run_state, &step_cancellation_manager,
                       run_options.timeout_in_ms() > 0
                           ? run_options.timeout_in_ms()
                           : operation_timeout_in_ms_);

   if (!cancellation_manager_->DeregisterCallback(cancellation_token)) {
     // The step has been cancelled: make sure we don't attempt to receive the
     // outputs as this would make it block forever.
     mutex_lock l(run_state.mu_);
     run_state.status.Update(errors::Cancelled("Run call was cancelled"));
   }

 #if GOOGLE_CUDA
   if (tracer) {
     // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object!
     tracer->Stop().IgnoreError();
     tracer->Collect(args.stats_collector).IgnoreError();
   }
 #endif  // GOOGLE_CUDA

   {
     mutex_lock l(run_state.mu_);
     TF_RETURN_IF_ERROR(run_state.status);
   }

   // Receive outputs.
   if (outputs) {
     std::vector<Tensor> sorted_outputs;
     Status s = call_frame.ConsumeRetvals(&sorted_outputs);
     if (errors::IsInternal(s)) {
       return errors::InvalidArgument(s.error_message());
     } else if (!s.ok()) {
       return s;
     }
     const bool unique_outputs =
         output_names.size() == executors_and_keys->output_name_to_index.size();
     // first_indices[i] = j implies that j is the smallest value for which
     // output_names[i] == output_names[j].
     std::vector<int> first_indices;
     if (!unique_outputs) {
       first_indices.resize(output_names.size());
       for (int i = 0; i < static_cast<int>(output_names.size()); ++i) {
         for (int j = 0; j <= i; ++j) {
           if (output_names[i] == output_names[j]) {
             first_indices[i] = j;
             break;
           }
         }
       }
     }
     outputs->clear();
     outputs->reserve(sorted_outputs.size());
     for (int i = 0; i < static_cast<int>(output_names.size()); ++i) {
       const string& output_name = output_names[i];
       if (first_indices.empty() || first_indices[i] == i) {
         outputs->emplace_back(
             std::move(sorted_outputs[executors_and_keys
                                          ->output_name_to_index[output_name]]));
       } else {
         outputs->push_back((*outputs)[first_indices[i]]);
       }
     }
   }

   // Save the output tensors of this run we choose to keep.
   TF_RETURN_IF_ERROR(
       run_state.tensor_store.SaveTensors(output_names, &session_state_));

   // Build and return the cost model as instructed.
   mutex_lock l(executor_lock_);
   if (update_cost_model) {
     // Build the cost model
     std::unordered_map<string, const Graph*> device_to_graph;
     for (const PerPartitionExecutorsAndLib& partition :
          executors_and_keys->items) {
       const Graph* graph = partition.graph;
       const string device = partition.flib->device()->name();
       device_to_graph[device] = graph;
     }
     args.stats_collector->BuildCostModel(&cost_model_manager_, device_to_graph);

     // annotate stats onto cost graph.
     CostGraphDef* cost_graph = run_metadata->mutable_cost_graph();
     for (const auto& item : executors_and_keys->items) {
       TF_RETURN_IF_ERROR(
           cost_model_manager_.AddToCostGraphDef(item.graph, cost_graph));
     }
   }

   // If requested via RunOptions, output the partition graphs.
   if (run_options.output_partition_graphs()) {
     protobuf::RepeatedPtrField<GraphDef>* partition_graph_defs =
         run_metadata->mutable_partition_graphs();
     for (const PerPartitionExecutorsAndLib& exec_and_lib :
          executors_and_keys->items) {
       GraphDef* partition_graph_def = partition_graph_defs->Add();
       exec_and_lib.graph->ToGraphDef(partition_graph_def);
     }
   }

   return Status::OK();
 }

 Status NTSession::PRunSetup(const std::vector<string>& input_names,
                                 const std::vector<string>& output_names,
                                 const std::vector<string>& target_nodes,
                                 string* handle) {
   TF_RETURN_IF_ERROR(CheckNotClosed());
   {
     mutex_lock l(graph_def_lock_);
     if (!graph_created_) {
       return errors::InvalidArgument(
           "Session was not created with a graph before PRunSetup()!");
     }
   }

   // Check if we already have an executor for these arguments.
   ExecutorsAndKeys* executors_and_keys;
   // TODO(cais): TFDBG support for partial runs.
   DebugOptions debug_options;
   RunStateArgs run_state_args(debug_options);
   run_state_args.is_partial_run = true;
   TF_RETURN_IF_ERROR(GetOrCreateExecutors(input_names, output_names,
                                           target_nodes, &executors_and_keys,
                                           &run_state_args));

   // Create the run state and save it for future PRun calls.
   Executor::Args args;
   args.step_id = step_id_counter_.fetch_add(1);
   RunState* run_state =
       new RunState(input_names, output_names, args.step_id, &devices_);
   run_state->rendez = new IntraProcessRendezvous(device_mgr_.get());
   {
     mutex_lock l(executor_lock_);
     if (!partial_runs_
              .emplace(run_state_args.handle,
                       std::unique_ptr<RunState>(run_state))
              .second) {
       return errors::Internal("The handle '", run_state_args.handle,
                               "' created for this partial run is not unique.");
     }
   }

   // Start parallel Executors.
   const size_t num_executors = executors_and_keys->items.size();
   ExecutorBarrier* barrier = new ExecutorBarrier(
       num_executors, run_state->rendez, [run_state](const Status& ret) {
         if (!ret.ok()) {
           mutex_lock l(run_state->mu_);
           run_state->status.Update(ret);
         }
         run_state->executors_done.Notify();
       });

   args.rendezvous = run_state->rendez;
   args.cancellation_manager = cancellation_manager_;
   args.runner = [this](Executor::Args::Closure c) {
     SchedClosure(std::move(c));
   };
   args.session_state = &session_state_;
   args.tensor_store = &run_state->tensor_store;
   args.step_container = &run_state->step_container;
   if (LogMemory::IsEnabled()) {
     LogMemory::RecordStep(args.step_id, run_state_args.handle);
   }
   args.sync_on_finish = sync_on_finish_;

   if (options_.config.graph_options().build_cost_model()) {
     run_state->collector.reset(new StepStatsCollector(nullptr));
     args.stats_collector = run_state->collector.get();
   }

   for (auto& item : executors_and_keys->items) {
     item.executor->RunAsync(args, barrier->Get());
   }

   *handle = run_state_args.handle;
   return Status::OK();
 }

 Status NTSession::PRun(const string& handle, const NamedTensorList& inputs,
                            const std::vector<string>& output_names,
                            std::vector<Tensor>* outputs) {
   TF_RETURN_IF_ERROR(CheckNotClosed());
   std::vector<string> parts = str_util::Split(handle, ';');
   const string& key = parts[0];
   // Get the executors for this partial run.
   ExecutorsAndKeys* executors_and_keys;
   RunState* run_state;
   {
     mutex_lock l(executor_lock_);  // could use reader lock
     auto exc_it = executors_.find(key);
     if (exc_it == executors_.end()) {
       return errors::InvalidArgument(
           "Must run 'setup' before performing partial runs!");
     }
     executors_and_keys = exc_it->second.get();

     auto prun_it = partial_runs_.find(handle);
     if (prun_it == partial_runs_.end()) {
       return errors::InvalidArgument(
           "Must run 'setup' before performing partial runs!");
     }
     run_state = prun_it->second.get();

     // Make sure that this is a new set of feeds that are still pending.
     for (const auto& input : inputs) {
       auto it = run_state->pending_inputs.find(input.first);
       if (it == run_state->pending_inputs.end()) {
         return errors::InvalidArgument(
             "The feed ", input.first,
             " was not specified in partial_run_setup.");
       } else if (it->second) {
         return errors::InvalidArgument("The feed ", input.first,
                                        " has already been fed.");
       }
     }
     // Check that this is a new set of fetches that are still pending.
     for (const auto& output : output_names) {
       auto it = run_state->pending_outputs.find(output);
       if (it == run_state->pending_outputs.end()) {
         return errors::InvalidArgument(
             "The fetch ", output, " was not specified in partial_run_setup.");
       } else if (it->second) {
         return errors::InvalidArgument("The fetch ", output,
                                        " has already been fetched.");
       }
     }
   }

   // Check that this new set of fetches can be computed from all the
   // feeds we have supplied.
   TF_RETURN_IF_ERROR(
       CheckFetch(inputs, output_names, executors_and_keys, run_state));

   // Send inputs.
   Status s = SendPRunInputs(inputs, executors_and_keys, run_state->rendez);

   // Receive outputs.
   if (s.ok()) {
     s = RecvPRunOutputs(output_names, executors_and_keys, run_state, outputs);
   }

   // Save the output tensors of this run we choose to keep.
   if (s.ok()) {
     s = run_state->tensor_store.SaveTensors(output_names, &session_state_);
   }

   {
     mutex_lock l(executor_lock_);
     // Delete the run state if there is an error or all fetches are done.
     bool done = true;
     if (s.ok()) {
       {
         mutex_lock l(run_state->mu_);
         if (!run_state->status.ok()) {
           LOG(WARNING) << "An error unrelated to this prun has been detected. "
                        << run_state->status;
         }
       }
       for (const auto& input : inputs) {
         auto it = run_state->pending_inputs.find(input.first);
         it->second = true;
       }
       for (const auto& name : output_names) {
         auto it = run_state->pending_outputs.find(name);
         it->second = true;
       }
       done = run_state->PendingDone();
     }
     if (done) {
       WaitForNotification(run_state, cancellation_manager_,
                           operation_timeout_in_ms_);
       partial_runs_.erase(handle);
     }
   }

   return s;
 }

 Status NTSession::ResourceHandleToInputTensor(const Tensor& resource_tensor,
                                                   Tensor* retrieved_tensor) {
   if (resource_tensor.dtype() != DT_RESOURCE) {
     return errors::InvalidArgument(strings::StrCat(
         "ResourceHandleToInputTensor() received non-DT_RESOURCE Tensor: ",
         resource_tensor.dtype()));
   }

   ResourceHandle resource_handle = resource_tensor.scalar<ResourceHandle>()();

   if (resource_handle.container() ==
       SessionState::kTensorHandleResourceTypeName) {
     return session_state_.GetTensor(resource_handle.name(), retrieved_tensor);
   } else {
     return errors::InvalidArgument(strings::StrCat(
         "Invalid resource type hash code: ", resource_handle.hash_code(),
         "(name: ", resource_handle.name(),
         " type: ", resource_handle.maybe_type_name(), ")"));
   }
 }

 Status NTSession::SendPRunInputs(const NamedTensorList& inputs,
                                      const ExecutorsAndKeys* executors_and_keys,
                                      IntraProcessRendezvous* rendez) {
   Status s;
   Rendezvous::ParsedKey parsed;
   // Insert the input tensors into the local rendezvous by their
   // rendezvous key.
   for (const auto& input : inputs) {
     auto it =
         executors_and_keys->input_name_to_rendezvous_key.find(input.first);
     if (it == executors_and_keys->input_name_to_rendezvous_key.end()) {
       return errors::Internal("'", input.first, "' is not a pre-defined feed.");
     }
     const string& input_key = it->second;

     s = Rendezvous::ParseKey(input_key, &parsed);
     if (!s.ok()) {
       rendez->StartAbort(s);
       return s;
     }

     if (input.second.dtype() == DT_RESOURCE) {
       Tensor tensor_from_handle;
       s = ResourceHandleToInputTensor(input.second, &tensor_from_handle);
       if (s.ok()) {
         s = rendez->Send(parsed, Rendezvous::Args(), tensor_from_handle, false);
       }
     } else {
       s = rendez->Send(parsed, Rendezvous::Args(), input.second, false);
     }

     if (!s.ok()) {
       rendez->StartAbort(s);
       return s;
     }
   }
   return Status::OK();
 }

 Status NTSession::RecvPRunOutputs(
     const std::vector<string>& output_names,
     const ExecutorsAndKeys* executors_and_keys, RunState* run_state,
     std::vector<Tensor>* outputs) {
   Status s;
   if (!output_names.empty()) {
     outputs->resize(output_names.size());
   }

   Rendezvous::ParsedKey parsed;
   // Get the outputs from the rendezvous
   for (size_t output_offset = 0; output_offset < output_names.size();
        ++output_offset) {
     const string& output_name = output_names[output_offset];
     auto it =
         executors_and_keys->output_name_to_rendezvous_key.find(output_name);
     if (it == executors_and_keys->output_name_to_rendezvous_key.end()) {
       return errors::Internal("'", output_name,
                               "' is not a pre-defined fetch.");
     }
     const string& output_key = it->second;
     Tensor output_tensor;
     bool is_dead;
     IntraProcessRendezvous* rendez = run_state->rendez;

     s = Rendezvous::ParseKey(output_key, &parsed);
     if (s.ok()) {
       // Fetch data from the Rendezvous.
       s = rendez->Recv(parsed, Rendezvous::Args(), &output_tensor, &is_dead,
                        operation_timeout_in_ms_);
       if (is_dead && s.ok()) {
         s = errors::InvalidArgument("The tensor returned for ", output_name,
                                     " was not valid.");
       }
     }
     if (!s.ok()) {
       rendez->StartAbort(s);
       outputs->clear();
       return s;
     }

     (*outputs)[output_offset] = output_tensor;
   }
   return Status::OK();
 }

 Status NTSession::CheckFetch(const NamedTensorList& feeds,
                                  const std::vector<string>& fetches,
                                  const ExecutorsAndKeys* executors_and_keys,
                                  const RunState* run_state) {
   const Graph* graph = executors_and_keys->graph.get();
   const NameNodeMap* name_to_node = &executors_and_keys->name_to_node;

   // Build the set of pending feeds that we haven't seen.
   std::unordered_set<TensorId, TensorId::Hasher> pending_feeds;
   {
     mutex_lock l(executor_lock_);
     for (const auto& input : run_state->pending_inputs) {
       // Skip if the feed has already been fed.
       if (input.second) continue;
       TensorId id(ParseTensorName(input.first));
       auto it = name_to_node->find(id.first);
       if (it == name_to_node->end()) {
         return errors::NotFound("Feed ", input.first, ": not found");
       }
       pending_feeds.insert(id);
     }
   }
   for (const auto& it : feeds) {
     TensorId id(ParseTensorName(it.first));
     pending_feeds.erase(id);
   }

   // Initialize the stack with the fetch nodes.
   std::vector<const Node*> stack;
   for (const string& fetch : fetches) {
     TensorId id(ParseTensorName(fetch));
     auto it = name_to_node->find(id.first);
     if (it == name_to_node->end()) {
       return errors::NotFound("Fetch ", fetch, ": not found");
     }
     stack.push_back(it->second);
   }

   // Any tensor needed for fetches can't be in pending_feeds.
   std::vector<bool> visited(graph->num_node_ids(), false);
   while (!stack.empty()) {
     const Node* n = stack.back();
     stack.pop_back();

     for (const Edge* in_edge : n->in_edges()) {
       const Node* in_node = in_edge->src();
       if (pending_feeds.count({in_node->name(), in_edge->src_output()}) > 0) {
         return errors::InvalidArgument("Fetch ", in_node->name(), ":",
                                        in_edge->src_output(),
                                        " can't be computed from the feeds"
                                        " that have been fed so far.");
       }
       if (!visited[in_node->id()]) {
         visited[in_node->id()] = true;
         stack.push_back(in_node);
       }
     }
   }
   return Status::OK();
 }

 Status NTSession::GetOrCreateExecutors(
     gtl::ArraySlice<string> inputs,
     gtl::ArraySlice<string> outputs, gtl::ArraySlice<string> target_nodes,
     ExecutorsAndKeys** executors_and_keys, RunStateArgs* run_state_args) {
   int64 handle_name_counter_value = -1;
   if (LogMemory::IsEnabled() || run_state_args->is_partial_run) {
     handle_name_counter_value = handle_name_counter_.fetch_add(1);
   }

   string debug_tensor_watches_summary;
   if (!run_state_args->debug_options.debug_tensor_watch_opts().empty()) {
     debug_tensor_watches_summary = SummarizeDebugTensorWatches(
         run_state_args->debug_options.debug_tensor_watch_opts());
   }

   // Fast lookup path, no sorting.
   const string key = strings::StrCat(
       str_util::Join(inputs, ","), "->", str_util::Join(outputs, ","), "/",
       str_util::Join(target_nodes, ","), "/", run_state_args->is_partial_run,
       "/", debug_tensor_watches_summary);
   // Set the handle, if it's needed to log memory or for partial run.
   if (handle_name_counter_value >= 0) {
     run_state_args->handle =
         strings::StrCat(key, ";", handle_name_counter_value);
   }

   // See if we already have the executors for this run.
   {
     mutex_lock l(executor_lock_);  // could use reader lock
     auto it = executors_.find(key);
     if (it != executors_.end()) {
       *executors_and_keys = it->second.get();
       return Status::OK();
     }
   }

   // Slow lookup path, the unsorted key missed the cache.
   // Sort the inputs and outputs, and look up with the sorted key in case an
   // earlier call used a different order of inputs and outputs.
   //
   // We could consider some other signature instead of sorting that
   // preserves the same property to avoid the sort in the future.
   std::vector<string> inputs_sorted(inputs.begin(), inputs.end());
   std::sort(inputs_sorted.begin(), inputs_sorted.end());
   std::vector<string> outputs_sorted(outputs.begin(), outputs.end());
   std::sort(outputs_sorted.begin(), outputs_sorted.end());
   std::vector<string> tn_sorted(target_nodes.begin(), target_nodes.end());
   std::sort(tn_sorted.begin(), tn_sorted.end());

   const string sorted_key = strings::StrCat(
       str_util::Join(inputs_sorted, ","), "->",
       str_util::Join(outputs_sorted, ","), "/", str_util::Join(tn_sorted, ","),
       "/", run_state_args->is_partial_run, "/", debug_tensor_watches_summary);
   // Set the handle, if its needed to log memory or for partial run.
   if (handle_name_counter_value >= 0) {
     run_state_args->handle =
         strings::StrCat(sorted_key, ";", handle_name_counter_value);
   }

   // See if we already have the executors for this run.
   {
     mutex_lock l(executor_lock_);
     auto it = executors_.find(sorted_key);
     if (it != executors_.end()) {
       *executors_and_keys = it->second.get();
       // Insert this under the original key.
       executors_.emplace(key, it->second);
       return Status::OK();
     }
   }

   // Nothing found, so create the executors and store in the cache.
   BuildGraphOptions options;
   options.feed_endpoints = inputs_sorted;
   options.fetch_endpoints = outputs_sorted;
   options.target_nodes = tn_sorted;
   options.use_function_convention = !run_state_args->is_partial_run;
   if (!run_state_args->debug_options.debug_tensor_watch_opts().empty()) {
     options.debug_options = run_state_args->debug_options;
   }

   std::shared_ptr<ExecutorsAndKeys> ek(new ExecutorsAndKeys);

   // The executor_lock_ is intentionally released while executor is
   // being created.
   std::unordered_map<string, std::unique_ptr<Graph>> graphs;
   TF_RETURN_IF_ERROR(CreateGraphs(options, &graphs, &ek->flib_def,
                                   run_state_args, &ek->input_types,
                                   &ek->output_types));

   if (run_state_args->is_partial_run) {
     ek->graph = std::move(run_state_args->graph);
     std::unordered_set<StringPiece, StringPiece::Hasher> names;
     for (const string& input : inputs) {
       TensorId id(ParseTensorName(input));
       names.emplace(id.first);
     }
     for (const string& output : outputs) {
       TensorId id(ParseTensorName(output));
       names.emplace(id.first);
     }
     for (Node* n : ek->graph->nodes()) {
       if (names.count(n->name()) > 0) {
         ek->name_to_node.insert({n->name(), n});
       }
     }
   }
   ek->items.reserve(graphs.size());
   const auto& optimizer_opts =
       options_.config.graph_options().optimizer_options();
   GraphOptimizer optimizer(optimizer_opts);
   for (auto iter = graphs.begin(); iter != graphs.end(); ++iter) {
     const string& partition_name = iter->first;
     std::unique_ptr<Graph>& partition_graph = iter->second;
     const int graph_def_version = partition_graph->versions().producer();

     Device* device;
     TF_RETURN_IF_ERROR(device_mgr_->LookupDevice(partition_name, &device));

     ek->items.resize(ek->items.size() + 1);
     auto* item = &(ek->items.back());
     item->flib.reset(NewFunctionLibraryRuntime(
         device_mgr_.get(), options_.env, device, graph_def_version,
         ek->flib_def.get(), optimizer_opts));

     LocalExecutorParams params;
     params.device = device;
     params.function_library = item->flib.get();
     auto lib = item->flib.get();
     auto opseg = device->op_segment();
     params.create_kernel = [this, lib, opseg](const NodeDef& ndef,
                                               OpKernel** kernel) {
       // Caches the kernel only if the node is stateful.
       if (!lib->IsStateful(ndef.op())) {
         return lib->CreateKernel(ndef, kernel);
       }
       auto create_fn = [lib, &ndef](OpKernel** kernel) {
         return lib->CreateKernel(ndef, kernel);
       };
       // Kernels created for subgraph nodes need to be cached.  On
       // cache miss, create_fn() is invoked to create a kernel based
       // on the function library here + global op registry.
       return opseg->FindOrCreate(session_handle_, ndef.name(), kernel,
                                  create_fn);
     };
     params.delete_kernel = [lib](OpKernel* kernel) {
       // If the node is stateful, opseg owns it. Otherwise, delete it.
       if (kernel && !lib->IsStateful(kernel->type_string())) {
         delete kernel;
       }
     };
     params.node_outputs_cb = node_outputs_callback_;

     optimizer.Optimize(lib, options_.env, device, &iter->second);

     // EXPERIMENTAL: tfdbg inserts debug nodes in the graph.
     if (!options.debug_options.debug_tensor_watch_opts().empty()) {
       TF_RETURN_IF_ERROR(DecorateAndPublishGraphForDebug(
           options.debug_options, partition_graph.get(), params.device));
     }

     TF_RETURN_IF_ERROR(EnsureMemoryTypes(DeviceType(device->device_type()),
                                          device->name(),
                                          partition_graph.get()));
     // NewLocalExecutor takes ownership of partition_graph.
     item->graph = partition_graph.get();
     item->executor = nullptr;
     Executor* executor;
     TF_RETURN_IF_ERROR(
         NewLocalExecutor(params, partition_graph.release(), &executor));
     item->executor.reset(executor);
   }

   // Cache the mapping from input/output names to graph elements to
   // avoid recomputing it every time.
   if (!run_state_args->is_partial_run) {
     // For regular `Run()`, we use the function calling convention, and so
     // maintain a mapping from input/output names to
     // argument/return-value ordinal index.
     for (size_t i = 0; i < inputs_sorted.size(); ++i) {
       const string& input = inputs_sorted[i];
       ek->input_name_to_index[input] = i;
     }
     for (size_t i = 0; i < outputs_sorted.size(); ++i) {
       const string& output = outputs_sorted[i];
       ek->output_name_to_index[output] = i;
     }
   } else {
     // For `PRun()`, we use the rendezvous calling convention, and so
     // maintain a mapping from input/output names to rendezvous keys.
     //
     // We always use the first device as the device name portion of the
     // key, even if we're feeding another graph.
     for (size_t i = 0; i < inputs_sorted.size(); ++i) {
       const string& input = inputs_sorted[i];
       ek->input_name_to_rendezvous_key[input] = GetRendezvousKey(
           input, device_set_.client_device()->attributes(), FrameAndIter(0, 0));
     }
     for (size_t i = 0; i < outputs_sorted.size(); ++i) {
       const string& output = outputs_sorted[i];
       ek->output_name_to_rendezvous_key[output] =
           GetRendezvousKey(output, device_set_.client_device()->attributes(),
                            FrameAndIter(0, 0));
     }
   }

   // Reacquire the lock, try to insert into the map.
   mutex_lock l(executor_lock_);

   // Another thread may have created the entry before us, in which case we will
   // reuse the already created one.
   auto insert_result = executors_.emplace(sorted_key, ek);
   // Insert the value under the original key, so the fast path lookup will work
   // if the user uses the same order of inputs, outputs, and targets again.
   executors_.emplace(key, insert_result.first->second);
   *executors_and_keys = insert_result.first->second.get();

   return Status::OK();
 }

 Status NTSession::CreateGraphs(
     const BuildGraphOptions& subgraph_options,
     std::unordered_map<string, std::unique_ptr<Graph>>* outputs,
     std::unique_ptr<FunctionLibraryDefinition>* flib_def,
     RunStateArgs* run_state_args, DataTypeVector* input_types,
     DataTypeVector* output_types) {
   mutex_lock l(graph_def_lock_);
   std::unique_ptr<SimpleClientGraph> client_graph;

   std::unique_ptr<SimpleGraphExecutionState> temp_exec_state_holder;
   SimpleGraphExecutionState* execution_state = nullptr;
   if (options_.config.graph_options().place_pruned_graph()) {
     // Because we are placing pruned graphs, we need to create a
     // new SimpleGraphExecutionState for every new unseen graph,
     // and then place it.
     SimpleGraphExecutionStateOptions prune_options;
     prune_options.device_set = &device_set_;
     prune_options.session_options = &options_;
     prune_options.stateful_placements = stateful_placements_;
     TF_RETURN_IF_ERROR(SimpleGraphExecutionState::MakeForPrunedGraph(
         execution_state_->original_graph_def().library(), prune_options,
         execution_state_->original_graph_def(), subgraph_options,
         &temp_exec_state_holder, &client_graph));
     execution_state = temp_exec_state_holder.get();
   } else {
     execution_state = execution_state_.get();
     TF_RETURN_IF_ERROR(
         execution_state->BuildGraph(subgraph_options, &client_graph));
   }

   if (subgraph_options.feed_endpoints.size() !=
       client_graph->feed_types.size()) {
     return errors::Internal(
         "Graph pruning failed: requested number of feed endpoints = ",
         subgraph_options.feed_endpoints.size(),
         " versus number of pruned feed endpoints = ",
         client_graph->feed_types.size());
   }
   if (subgraph_options.fetch_endpoints.size() !=
       client_graph->fetch_types.size()) {
     return errors::Internal(
         "Graph pruning failed: requested number of fetch endpoints = ",
         subgraph_options.fetch_endpoints.size(),
         " versus number of pruned fetch endpoints = ",
         client_graph->fetch_types.size());
   }

   auto current_stateful_placements = execution_state->GetStatefulPlacements();
   // Update our current state based on the execution_state's
   // placements.  If there are any mismatches for a node,
   // we should fail, as this should never happen.
   for (auto placement_pair : current_stateful_placements) {
     const string& node_name = placement_pair.first;
     const string& placement = placement_pair.second;
     auto iter = stateful_placements_.find(node_name);
     if (iter == stateful_placements_.end()) {
       stateful_placements_.insert(std::make_pair(node_name, placement));
     } else if (iter->second != placement) {
       return errors::Internal(
           "Stateful placement mismatch. "
           "Current assignment of ",
           node_name, " to ", iter->second, " does not match ", placement);
     }
   }

   stateful_placements_ = execution_state->GetStatefulPlacements();

   // Remember the graph in run state if this is a partial run.
   if (run_state_args->is_partial_run) {
     run_state_args->graph.reset(new Graph(flib_def_.get()));
     CopyGraph(*execution_state->full_graph(), run_state_args->graph.get());
   }

   // Partition the graph across devices.
   PartitionOptions popts;
   popts.node_to_loc = [](const Node* node) {
     assert(node != nullptr);
     return node->assigned_device_name();
   };
   popts.new_name = [this](const string& prefix) {
     return strings::StrCat(prefix, "/_", edge_name_counter_.fetch_add(1));
   };
   popts.get_incarnation = [](const string& name) {
     // The direct session does not have changing incarnation numbers.
     // Just return '1'.
     return 1;
   };
   popts.control_flow_added = false;

   std::unordered_map<string, GraphDef> partitions;
   TF_RETURN_IF_ERROR(Partition(popts, &client_graph->graph, &partitions));

   std::vector<string> device_names;
   for (auto device : devices_) {
     // Extract the LocalName from the device.
     device_names.push_back(DeviceNameUtils::LocalName(device->name()));
   }

   // Check for valid partitions.
   for (const auto& partition : partitions) {
     const string local_partition_name =
         DeviceNameUtils::LocalName(partition.first);
     if (std::count(device_names.begin(), device_names.end(),
                    local_partition_name) == 0) {
       return errors::InvalidArgument(
           "Creating a partition for ", local_partition_name,
           " which doesn't exist in the list of available devices. Available "
           "devices: ",
           str_util::Join(device_names, ","));
     }
   }

   for (const auto& partition : partitions) {
     std::unique_ptr<Graph> device_graph(
         new Graph(client_graph->flib_def.get()));
     GraphConstructorOptions device_opts;
     // There are internal operations (e.g., send/recv) that we now allow.
     device_opts.allow_internal_ops = true;
     device_opts.expect_device_spec = true;
     TF_RETURN_IF_ERROR(ConvertGraphDefToGraph(device_opts, partition.second,
                                               device_graph.get()));
     outputs->emplace(partition.first, std::move(device_graph));
   }

   GraphOptimizationPassOptions optimization_options;
   optimization_options.session_options = &options_;
   optimization_options.flib_def = client_graph->flib_def.get();
   optimization_options.partition_graphs = outputs;
   TF_RETURN_IF_ERROR(OptimizationPassRegistry::Global()->RunGrouping(
       OptimizationPassRegistry::POST_PARTITIONING, optimization_options));

   Status s;
   for (auto& partition : *outputs) {
     const string& partition_name = partition.first;
     std::unique_ptr<Graph>* graph = &partition.second;

     VLOG(2) << "Created " << DebugString(graph->get()) << " for "
             << partition_name;

     // Give the device an opportunity to rewrite its subgraph.
     Device* d;
     s = device_mgr_->LookupDevice(partition_name, &d);
     if (!s.ok()) break;
     // TODO(pbar) The library is currently shared and immutable. There
     // may be possible use cases where a device may want to modify
     // function definitions - in which case the library would need to be
     // replicated per device.
     s = d->MaybeRewriteGraph(client_graph->flib_def->ToProto(), graph);
     if (!s.ok()) {
       break;
     }
   }
   *flib_def = std::move(client_graph->flib_def);
   std::swap(*input_types, client_graph->feed_types);
   std::swap(*output_types, client_graph->fetch_types);
   return s;
 }

 ::tensorflow::Status NTSession::ListDevices(
     std::vector<DeviceAttributes>* response) {
   response->clear();
   response->reserve(devices_.size());
   for (Device* d : devices_) {
     const DeviceAttributes& attrs = d->attributes();
     response->emplace_back(attrs);
   }
   return ::tensorflow::Status::OK();
 }

 ::tensorflow::Status NTSession::Reset(
     const std::vector<string>& containers) {
   device_mgr_->ClearContainers(containers);
   return ::tensorflow::Status::OK();
 }

 ::tensorflow::Status NTSession::Close() {
   cancellation_manager_->StartCancel();
   {
     mutex_lock l(closed_lock_);
     if (closed_) return ::tensorflow::Status::OK();
     closed_ = true;
   }
   if (factory_ != nullptr) factory_->Deregister(this);
   return ::tensorflow::Status::OK();
 }

 NTSession::RunState::RunState(
     const std::vector<string>& pending_input_names,
     const std::vector<string>& pending_output_names, int64 step_id,
     const std::vector<Device*>* devices)
     : step_container(step_id, [devices](const string& name) {
         for (auto d : *devices) {
           if (!d->resource_manager()->Cleanup(name).ok()) {
             // Do nothing...
           }
         }
       }) {
   // Initially all the feeds and fetches are pending.
   for (auto& name : pending_input_names) {
     pending_inputs[name] = false;
   }
   for (auto& name : pending_output_names) {
     pending_outputs[name] = false;
   }
 }

 NTSession::RunState::RunState(int64 step_id,
                                   const std::vector<Device*>* devices)
     : RunState({}, {}, step_id, devices) {}

 NTSession::RunState::~RunState() {
   if (rendez != nullptr) {
     if (!executors_done.HasBeenNotified()) {
       rendez->StartAbort(errors::Cancelled("PRun cancellation"));
       executors_done.WaitForNotification();
     }
     rendez->Unref();
   }
 }

 bool NTSession::RunState::PendingDone() const {
   for (const auto& it : pending_inputs) {
     if (!it.second) return false;
   }
   for (const auto& it : pending_outputs) {
     if (!it.second) return false;
   }
   return true;
 }

 void NTSession::WaitForNotification(RunState* run_state,
                                         CancellationManager* cm,
                                         int64 timeout_in_ms) {
   Status status =
       WaitForNotification(&run_state->executors_done, timeout_in_ms);
   if (!status.ok()) {
     {
       mutex_lock l(run_state->mu_);
       run_state->status.Update(status);
     }
     cm->StartCancel();
     // We must wait for the executors to complete, because they have borrowed
     // references to `cm` and other per-step state. After this notification, it
     // is safe to clean up the step.
     run_state->executors_done.WaitForNotification();
   }
 }

 ::tensorflow::Status NTSession::WaitForNotification(
     Notification* notification, int64 timeout_in_ms) {
   if (timeout_in_ms > 0) {
     int64 timeout_in_us = timeout_in_ms * 1000;
     bool notified = WaitForNotificationWithTimeout(notification, timeout_in_us);
     if (!notified) {
       return Status(error::DEADLINE_EXCEEDED,
                     "Timed out waiting for notification");
     }
   } else {
     notification->WaitForNotification();
   }
   return Status::OK();
 }

 }  // namespace tensorflow
tensorflow::NTSession::ExecutorsAndKeys::output_types
DataTypeVector output_types
Definition: NTSession.h:160

tensorflow::NTSession::CreateDebuggerState
::tensorflow::Status CreateDebuggerState(const DebugOptions &debug_options, int64 session_run_index, int64 executor_step_index, const std::vector< string > &input_names, const std::vector< string > &output_names, const std::vector< string > &target_names, std::unique_ptr< DebuggerStateInterface > *debugger_state)
Definition: NTSession.cc:342

tensorflow::NTSession::ExecutorsAndKeys::items
std::vector< PerPartitionExecutorsAndLib > items
Definition: NTSession.h:153

tensorflow::NTSession::PerPartitionExecutorsAndLib
Definition: NTSession.h:128

tensorflow::NTSession::step_id_counter_
static std::atomic_int_fast64_t step_id_counter_
Definition: NTSession.h:336

KineDebug3::count
void count()
Definition: KinematicConstrainedVertexUpdatorT.h:20

NTSession.h

tensorflow::NTSession::Reset
::tensorflow::Status Reset(const std::vector< string > &containers)
Definition: NTSession.cc:1350

tensorflow::NTSession::RunState::PendingDone
bool PendingDone() const
Definition: NTSession.cc:1401

mps_fire.i
i
Definition: mps_fire.py:269

tensorflow::NTSession::MaybeInitializeExecutionState
Status MaybeInitializeExecutionState(const GraphDef &graph, bool *out_already_initialized) EXCLUSIVE_LOCKS_REQUIRED(graph_def_lock_)
Definition: NTSession.cc:268

tensorflow::NTSession::RunState
Definition: NTSession.h:167

tensorflow::NTSession::RunStateArgs::handle
string handle
Definition: NTSession.h:194

tensorflow::NTSession::options_
const SessionOptions options_
Definition: NTSession.h:273

cscdqm::h::names
static const HistoName names[]
Definition: CSCDQM_HistoDef.h:831

tensorflow::NTSessionFactory
Definition: NTSession.cc:107

tensorflow::NTSession::ExecutorsAndKeys::input_name_to_rendezvous_key
std::unordered_map< string, string > input_name_to_rendezvous_key
Definition: NTSession.h:155

mutex
static boost::mutex mutex
Definition: LHEProxy.cc:11

tensorflow::NTSession::RunState::rendez
IntraProcessRendezvous * rendez
Definition: NTSession.h:170

tensorflow::NTSession::ResourceHandleToInputTensor
::tensorflow::Status ResourceHandleToInputTensor(const Tensor &resource_tensor, Tensor *retrieved_tensor)
Definition: NTSession.cc:794

PatBasicFWLiteJetAnalyzer_Selector_cfg.outputs
outputs
Definition: PatBasicFWLiteJetAnalyzer_Selector_cfg.py:48

AlCaHLTBitMon_QueryRunRegistry.string
string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:255

mps_check.lib
lib
Definition: mps_check.py:19

options
Definition: options.py:1

tensorflow::NTSession::session_handle_
string session_handle_
Definition: NTSession.h:280

tensorflow::NTSession::PRunSetup
::tensorflow::Status PRunSetup(const std::vector< string > &input_names, const std::vector< string > &output_names, const std::vector< string > &target_nodes, string *handle) override
Definition: NTSession.cc:617

tensorflow::NTSession::ExecutorsAndKeys
Definition: NTSession.h:146

tensorflow::NTSessionFactory::Deregister
void Deregister(const NTSession *session)
Definition: NTSession.cc:159

tensorflow::NTSession::DecorateAndPublishGraphForDebug
::tensorflow::Status DecorateAndPublishGraphForDebug(const DebugOptions &debug_options, Graph *graph, Device *device)
Definition: NTSession.cc:356

alignCSCRings.s
s
Definition: alignCSCRings.py:91

EnergyCorrector.c
c
Definition: EnergyCorrector.py:43

groupFilesInBlocks.temp
temp
Definition: groupFilesInBlocks.py:140

dqm::qstatus::WARNING
static const int WARNING
Definition: DQMDefinitions.h:18

thread_safety_macros.h

RecoTauPiZeroBuilderPlugins_cfi.function
function
Definition: RecoTauPiZeroBuilderPlugins_cfi.py:82

tensorflow::NTSession::session_state_
SessionState session_state_
Definition: NTSession.h:306

convertSQLitetoXML_cfg.output
output
Definition: convertSQLitetoXML_cfg.py:32

tensorflow::NTSession::ExecutorsAndKeys::output_name_to_rendezvous_key
std::unordered_map< string, string > output_name_to_rendezvous_key
Definition: NTSession.h:157

LOG
#define LOG(A)
Definition: Simplify_begin.h:61

config
Definition: config.py:1

crabWrapper.key
key
Definition: crabWrapper.py:17

tensorflow::NTSession::RunState::~RunState
~RunState()
Definition: NTSession.cc:1391

mps_update.status
status
Definition: mps_update.py:67

PatBasicFWLiteJetAnalyzer_Selector_cfg.inputs
inputs
Definition: PatBasicFWLiteJetAnalyzer_Selector_cfg.py:40

tensorflow::NTSessionFactory::AcceptsOptions
bool AcceptsOptions(const SessionOptions &options) override
Definition: NTSession.cc:111

tensorflow::NTSession::SendPRunInputs
::tensorflow::Status SendPRunInputs(const std::vector< std::pair< string, Tensor >> &inputs, const ExecutorsAndKeys *executors_and_keys, IntraProcessRendezvous *rendez)
Definition: NTSession.cc:815

tensorflow::NTSession::NamedTensorList
std::vector< std::pair< string, Tensor > > NamedTensorList
Definition: NTSession.h:82

tensorflow::NTSessionFactory::Reset
Status Reset(const SessionOptions &options, const std::vector< string > &containers) override
Definition: NTSession.cc:137

tensorflow::NTSession::RunState::RunState
RunState(int64 step_id, const std::vector< Device * > *devices)
Definition: NTSession.cc:1387

tensorflow::NTSession::PRun
::tensorflow::Status PRun(const string &handle, const NamedTensorList &inputs, const std::vector< string > &output_names, std::vector< Tensor > *outputs) override
Definition: NTSession.cc:694

input
static std::string const input
Definition: EdmProvDump.cc:44

tensorflow::NTSession::~NTSession
~NTSession() override
Definition: NTSession.cc:251

tensorflow::NTSession::ExecutorsAndKeys::input_types
DataTypeVector input_types
Definition: NTSession.h:159

tensorflow::NTSession
Definition: NTSession.h:70

tensorflow::NTSession::RunState::executors_done
Notification executors_done
Definition: NTSession.h:172

tensorflow::NTSession::RunStateArgs::is_partial_run
bool is_partial_run
Definition: NTSession.h:193

tensorflow::NTSession::device_set_
DeviceSet device_set_
Definition: NTSession.h:278

Partition
Partition
Definition: HLTHPDFilter.cc:32

tensorflow::NTSession::PerPartitionExecutorsAndLib::graph
Graph * graph
Definition: NTSession.h:129

tensorflow::NTSession::RunState::pending_outputs
std::unordered_map< string, bool > pending_outputs
Definition: NTSession.h:174

tensorflow::NTSession::closed_lock_
mutex closed_lock_
Definition: NTSession.h:328

tensorflow::NTSession::Run
::tensorflow::Status Run(const NamedTensorList &inputs, const std::vector< string > &output_names, const std::vector< string > &target_nodes, std::vector< Tensor > *outputs) override
Definition: NTSession.cc:333

tensorflow::registrar
static NTSessionRegistrar registrar
Definition: NTSession.cc:176

pfDeepBoostedJetPreprocessParams_cfi.input_names
input_names
Definition: pfDeepBoostedJetPreprocessParams_cfi.py:4

triggerObjects_cff.id
id
Definition: triggerObjects_cff.py:28

tensorflow::NTSession::CheckNotClosed
::tensorflow::Status CheckNotClosed()
Definition: NTSession.h:257

std::swap
void swap(edm::DataFrameContainer &lhs, edm::DataFrameContainer &rhs)
Definition: DataFrameContainer.h:236

tensorflow::NTSession::ExtendLocked
::tensorflow::Status ExtendLocked(const GraphDef &graph) EXCLUSIVE_LOCKS_REQUIRED(graph_def_lock_)
Definition: NTSession.cc:318

MessageLogger_cfi.INFO
INFO
Definition: MessageLogger_cfi.py:16

svgfig.stack
stack
Definition: svgfig.py:558

CMS_THREAD_SAFE
#define CMS_THREAD_SAFE

tablePrinter.prefix
prefix
Definition: tablePrinter.py:63

tensorflow::NTSession::WaitForNotification
::tensorflow::Status WaitForNotification(Notification *n, int64 timeout_in_ms)
Definition: NTSession.cc:1429

tensorflow::NTSession::ListDevices
::tensorflow::Status ListDevices(std::vector< DeviceAttributes > *response) override
Definition: NTSession.cc:1339

tensorflow::NTSession::CheckFetch
::tensorflow::Status CheckFetch(const std::vector< std::pair< string, Tensor >> &feeds, const std::vector< string > &fetches, const ExecutorsAndKeys *executors_and_keys, const RunState *run_state)
Definition: NTSession.cc:900

tensorflow::NTSession::ExecutorsAndKeys::input_name_to_index
std::unordered_map< string, size_t > input_name_to_index
Definition: NTSession.h:154

tensorflow::NTSession::sync_on_finish_
bool sync_on_finish_
Definition: NTSession.h:289

tensorflow::NTSession::edge_name_counter_
std::atomic< int64 > edge_name_counter_
Definition: NTSession.h:332

edmIntegrityCheck.d
d
Definition: edmIntegrityCheck.py:65

tensorflow::NTSession::Close
::tensorflow::Status Close() override
Definition: NTSession.cc:1356

tensorflow::NTSession::handle_name_counter_
std::atomic< int64 > handle_name_counter_
Definition: NTSession.h:333

tensorflow::NTSession::device_mgr_
const std::unique_ptr< const DeviceMgr > device_mgr_
Definition: NTSession.h:276

tensorflow::NTSessionFactory::NewSession
Session * NewSession(const SessionOptions &options) override
Definition: NTSession.cc:115

reco::OK
std::pair< int, edm::FunctionWithDict > OK
Definition: findMethod.cc:136

tensorflow::NTSession::RunStateArgs::graph
std::unique_ptr< Graph > graph
Definition: NTSession.h:195

createfilelist.args
args
Definition: createfilelist.py:13

tensorflow::NTSession::RecvPRunOutputs
::tensorflow::Status RecvPRunOutputs(const std::vector< string > &output_names, const ExecutorsAndKeys *executors_and_keys, RunState *run_state, std::vector< Tensor > *outputs)
Definition: NTSession.cc:854

Session

tensorflow::NTSession::CreateGraphs
::tensorflow::Status CreateGraphs(const BuildGraphOptions &options, std::unordered_map< string, std::unique_ptr< Graph >> *outputs, std::unique_ptr< FunctionLibraryDefinition > *flib_def, RunStateArgs *run_state_args, DataTypeVector *input_types, DataTypeVector *output_types)
Definition: NTSession.cc:1181

tensorflow::NTSession::devices_
std::vector< Device * > devices_
Definition: NTSession.h:277

plotBeamSpotDB.first
first
Definition: plotBeamSpotDB.py:379

tensorflow::NTSession::node_outputs_callback_
Executor::Args::NodeOutputsCallback node_outputs_callback_
Definition: NTSession.h:344

tensorflow::NTSession::ExecutorsAndKeys::step_count
std::atomic_int_fast64_t step_count
Definition: NTSession.h:149

tensorflow::NTSession::PerPartitionExecutorsAndLib::flib
std::unique_ptr< FunctionLibraryRuntime > flib
Definition: NTSession.h:130

MatrixUtil.remove
def remove(d, key, TELL=False)
Definition: MatrixUtil.py:209

tensorflow::NTSession::RunState::tensor_store
TensorStore tensor_store
Definition: NTSession.h:175

tensorflow::NTSession::SchedClosure
void SchedClosure(std::function< void()> c)
Definition: NTSession.cc:195

cmsBatch.handle
handle
Definition: cmsBatch.py:284

gen::n
int n
Definition: Cascade2Hadronizer.cc:79

dataDML.session
session
Definition: dataDML.py:2333

tensorflow::NTSessionFactory::GUARDED_BY
std::vector< NTSession * > sessions_ GUARDED_BY(sessions_lock_)

tensorflow::NTSession::GetOrCreateExecutors
::tensorflow::Status GetOrCreateExecutors(gtl::ArraySlice< string > inputs, gtl::ArraySlice< string > outputs, gtl::ArraySlice< string > target_nodes, ExecutorsAndKeys **executors_and_keys, RunStateArgs *run_state_args)
Definition: NTSession.cc:961

tensorflow::NTSession::RunState::pending_inputs
std::unordered_map< string, bool > pending_inputs
Definition: NTSession.h:173

edm::errors::NotFound
Definition: EDMException.h:58

tensorflow::NTSessionRegistrar::NTSessionRegistrar
NTSessionRegistrar()
Definition: NTSession.cc:172

tensorflow::NTSession::NTSession
NTSession(const SessionOptions &options, const DeviceMgr *device_mgr, NTSessionFactory *factory)
Definition: NTSession.cc:208

tensorflow
Definition: TensorFlow.h:22

tensorflow::NTSession::cancellation_manager_
CancellationManager * cancellation_manager_
Definition: NTSession.h:309

tensorflow::NTSessionRegistrar
Definition: NTSession.cc:170

checklumidiff.l
l
Definition: checklumidiff.py:65

tensorflow::NTSession::ExecutorsAndKeys::graph
std::unique_ptr< Graph > graph
Definition: NTSession.h:150

tensorflow::NTSession::Create
::tensorflow::Status Create(const GraphDef &graph) override
Definition: NTSession.cc:299

tensorflow::NTSession::NameNodeMap
std::unordered_map< StringPiece, Node *, StringPiece::Hasher > NameNodeMap
Definition: NTSession.h:84

tensorflow::NTSession::executor_lock_
mutex executor_lock_
Definition: NTSession.h:292

btagGenBb_cfi.Status
Status
Definition: btagGenBb_cfi.py:4

tensorflow::NTSession::operation_timeout_in_ms_
const int64 operation_timeout_in_ms_
Definition: NTSession.h:339

tensorflow::NTSession::factory_
NTSessionFactory *const factory_
Definition: NTSession.h:308

tensorflow::NTSession::flib_def_
std::unique_ptr< FunctionLibraryDefinition > flib_def_
Definition: NTSession.h:325

cuy.graphs
graphs
Definition: cuy.py:960

CfgNavigationSchool_cfi.parts
parts
Definition: CfgNavigationSchool_cfi.py:12

dataset.name
name
Definition: dataset.py:45

tensorflow::NTSession::RunStateArgs::debug_options
const DebugOptions & debug_options
Definition: NTSession.h:196

cond::persistency::fetch
std::pair< std::string, std::shared_ptr< void > > fetch(const cond::Hash &payloadId, Session &session)
Definition: CondDBFetch.cc:327

tensorflow::NTSessionFactory::sessions_lock_
mutex sessions_lock_
Definition: NTSession.cc:166

class-composition.visited
visited
Definition: class-composition.py:73

tensorflow::NTSession::graph_def_lock_
mutex graph_def_lock_
Definition: NTSession.h:283

tensorflow::NTSession::cost_model_manager_
CostModelManager cost_model_manager_
Definition: NTSession.h:342

tensorflow::NTSession::RunState::mu_
mutex mu_
Definition: NTSession.h:168

AlcaSiPixelAliHarvester0T_cff.options
options
Definition: AlcaSiPixelAliHarvester0T_cff.py:42

tensorflow::NTSession::RunStateArgs
Definition: NTSession.h:190

eostools.move
def move(src, dest)
Definition: eostools.py:510

tensorflow::NTSession::init_error_
Status init_error_
Definition: NTSession.h:286

dqm::qstatus::ERROR
static const int ERROR
Definition: DQMDefinitions.h:19

tensorflow::NTSession::Extend
::tensorflow::Status Extend(const GraphDef &graph) override
Definition: NTSession.cc:312

tensorflow::NTSession::ExecutorsAndKeys::name_to_node
NameNodeMap name_to_node
Definition: NTSession.h:151

tensorflow::NTSessionFactory::NTSessionFactory
NTSessionFactory()
Definition: NTSession.cc:109