CMS 3D CMS Logo

throwIfImproperDependencies.cc
Go to the documentation of this file.
1 // -*- ++ -*-
2 //
3 // Package: FWCore/Framework
4 // Function: throwIfImproperDependencies
5 //
6 // Implementation:
7 // [Notes on implementation]
8 //
9 // Original Author: root
10 // Created: Tue, 06 Sep 2016 16:04:28 GMT
11 //
12 
13 // system include files
14 
15 // user include files
18 
19 #include "boost/graph/graph_traits.hpp"
20 #include "boost/graph/adjacency_list.hpp"
21 #include "boost/graph/depth_first_search.hpp"
22 #include "boost/graph/visitors.hpp"
23 
24 namespace {
25  //====================================
26  // checkForCorrectness algorithm
27  //
28  // The code creates a 'dependency' graph between all
29  // modules. A module depends on another module if
30  // 1) it 'consumes' data produced by that module
31  // 2) it appears directly after the module within a Path
32  //
33  // If there is a cycle in the 'dependency' graph then
34  // the schedule may be unrunnable. The schedule is still
35  // runnable if all cycles have at least two edges which
36  // connect modules only by Path dependencies (i.e. not
37  // linked by a data dependency).
38  //
39  // Example 1:
40  // C consumes data from B
41  // Path 1: A + B + C
42  // Path 2: B + C + A
43  //
44  // Cycle: A after C [p2], C consumes B, B after A [p1]
45  // Since this cycle has 2 path only edges it is OK since
46  // A and (B+C) are independent so their run order doesn't matter
47  //
48  // Example 2:
49  // B consumes A
50  // C consumes B
51  // Path: C + A
52  //
53  // Cycle: A after C [p], C consumes B, B consumes A
54  // Since this cycle has 1 path only edge it is unrunnable.
55  //
56  // Example 3:
57  // A consumes B
58  // B consumes C
59  // C consumes A
60  // (no Path since unscheduled execution)
61  //
62  // Cycle: A consumes B, B consumes C, C consumes A
63  // Since this cycle has 0 path only edges it is unrunnable.
64  //====================================
65 
66  typedef std::pair<unsigned int, unsigned int> SimpleEdge;
67  typedef std::map<SimpleEdge, std::vector<unsigned int>> EdgeToPathMap;
68 
69  typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS> Graph;
70 
71  typedef boost::graph_traits<Graph>::edge_descriptor Edge;
72 
73  template <typename T>
74  std::unordered_set<T> intersect(std::unordered_set<T> const& iLHS, std::unordered_set<T> const& iRHS) {
75  std::unordered_set<T> result;
76  if (iLHS.size() < iRHS.size()) {
77  result.reserve(iLHS.size());
78  for (auto const& l : iLHS) {
79  if (iRHS.find(l) != iRHS.end()) {
80  result.insert(l);
81  }
82  }
83  return result;
84  }
85  result.reserve(iRHS.size());
86  for (auto const& r : iRHS) {
87  if (iLHS.find(r) != iLHS.end()) {
88  result.insert(r);
89  }
90  }
91  return result;
92  }
93 
94  struct cycle_detector : public boost::dfs_visitor<> {
95  static const unsigned int kRootVertexIndex = 0;
96 
97  cycle_detector(EdgeToPathMap const& iEdgeToPathMap,
98  std::vector<std::vector<unsigned int>> const& iPathIndexToModuleIndexOrder,
99  std::vector<std::string> const& iPathNames,
100  std::unordered_map<unsigned int, std::string> const& iModuleIndexToNames)
101  : m_edgeToPathMap(iEdgeToPathMap),
102  m_pathIndexToModuleIndexOrder(iPathIndexToModuleIndexOrder),
103  m_pathNames(iPathNames),
104  m_indexToNames(iModuleIndexToNames) {}
105 
106  bool compare(Edge const& iLHS, Edge const& iRHS) const;
107 
108  void tree_edge(Edge const& iEdge, Graph const& iGraph) {
109  auto const& index = get(boost::vertex_index, iGraph);
110 
111  auto in = index[source(iEdge, iGraph)];
112  for (auto it = m_stack.begin(); it != m_stack.end(); ++it) {
113  if (in == index[source(*it, iGraph)]) {
114  //this vertex is now being used to probe a new edge
115  // so we should drop the rest of the tree
116  m_stack.erase(it, m_stack.end());
117  break;
118  }
119  }
120 
121  m_stack.push_back(iEdge);
122  }
123 
124  void finish_vertex(unsigned int iVertex, Graph const& iGraph) {
125  if (not m_stack.empty()) {
126  auto const& index = get(boost::vertex_index, iGraph);
127 
128  if (iVertex == index[source(m_stack.back(), iGraph)]) {
129  m_stack.pop_back();
130  }
131  }
132  }
133 
134  //Called if a cycle happens
135  void back_edge(Edge const& iEdge, Graph const& iGraph) {
136  auto const& index = get(boost::vertex_index, iGraph);
137 
138  if (kRootVertexIndex != index[source(m_stack.front(), iGraph)]) {
139  //this part of the graph is not connected to data processing
140  return;
141  }
142 
143  m_stack.push_back(iEdge);
144 
145  auto pop_stack = [](std::vector<Edge>* stack) { stack->pop_back(); };
146  std::unique_ptr<std::vector<Edge>, decltype(pop_stack)> guard(&m_stack, pop_stack);
147 
148  //This edge has not been added to the stack yet
149  // making a copy allows us to add it in but not worry
150  // about removing it at the end of the routine
151  std::vector<Edge> tempStack;
152 
153  tempStack = findMinimumCycle(m_stack, iGraph);
154  checkCycleForProblem(tempStack, iGraph);
155  for (auto const& edge : tempStack) {
156  unsigned int in = index[source(edge, iGraph)];
157  unsigned int out = index[target(edge, iGraph)];
158 
159  m_verticiesInFundamentalCycles.insert(in);
160  m_verticiesInFundamentalCycles.insert(out);
161  }
162 
163  //NOTE: Need to remove any 'extra' bits at beginning of stack
164  // which may not be part of the cycle
165  m_fundamentalCycles.emplace_back(std::move(tempStack));
166  }
167 
168  void forward_or_cross_edge(Edge iEdge, Graph const& iGraph) {
170  IndexMap const& index = get(boost::vertex_index, iGraph);
171 
172  if (kRootVertexIndex != index[source(m_stack.front(), iGraph)]) {
173  //this part of the graph is not connected to data processing
174  return;
175  }
176 
177  const unsigned int out = index[target(iEdge, iGraph)];
178 
179  //If this is a crossing edge whose out vertex is part of a fundamental cycle
180  // then this path is also part of a cycle
181  if (m_verticiesInFundamentalCycles.end() == m_verticiesInFundamentalCycles.find(out)) {
182  return;
183  }
184 
185  for (auto const& cycle : m_fundamentalCycles) {
186  //Is the out vertex in this cycle?
187  auto itStartMatch = cycle.end();
188  for (auto it = cycle.begin(); it != cycle.end(); ++it) {
189  unsigned int inCycle = index[source(*it, iGraph)];
190 
191  if (out == inCycle) {
192  itStartMatch = it;
193  break;
194  }
195  }
196  if (itStartMatch == cycle.end()) {
197  //this cycle isn't the one which uses the vertex from the stack
198  continue;
199  }
200 
201  //tempStack will hold a stack that could have been found by depth first
202  // search if module to index ordering had been different
203  m_stack.push_back(iEdge);
204  auto pop_stack = [](std::vector<Edge>* stack) { stack->pop_back(); };
205  std::unique_ptr<std::vector<Edge>, decltype(pop_stack)> guard(&m_stack, pop_stack);
206  auto tempStack = findMinimumCycle(m_stack, iGraph);
207 
208  //the set of 'in' verticies presently in the stack is used to find where an 'out'
209  // vertex from the fundamental cycle connects into the present stack
210  std::set<unsigned int> verticiesInStack;
211  for (auto const& edge : tempStack) {
212  verticiesInStack.insert(index[source(edge, iGraph)]);
213  }
214 
215  //Now find place in the fundamental cycle that attaches to the stack
216  // First see if that happens later in the stack
217  auto itLastMatch = cycle.end();
218  for (auto it = itStartMatch; it != cycle.end(); ++it) {
219  unsigned int outCycle = index[target(*it, iGraph)];
220  if (verticiesInStack.end() != verticiesInStack.find(outCycle)) {
221  itLastMatch = it;
222  break;
223  }
224  }
225  if (itLastMatch == cycle.end()) {
226  //See if we can find the attachment to the stack earlier in the cycle
227  tempStack.insert(tempStack.end(), itStartMatch, cycle.end());
228  for (auto it = cycle.begin(); it != itStartMatch; ++it) {
229  unsigned int outCycle = index[target(*it, iGraph)];
230  if (verticiesInStack.end() != verticiesInStack.find(outCycle)) {
231  itLastMatch = it;
232  break;
233  }
234  }
235  if (itLastMatch == cycle.end()) {
236  //need to use the full cycle
237  //NOTE: this should just retest the same cycle but starting
238  // from a different position. If everything is correct, then
239  // this should also pass so in principal we could return here.
240  //However, as long as this isn't a performance problem, having
241  // this additional check could catch problems in the algorithm.
242  tempStack.insert(tempStack.end(), cycle.begin(), itStartMatch);
243  } else {
244  tempStack.insert(tempStack.end(), cycle.begin(), itLastMatch + 1);
245  }
246  } else {
247  if ((itStartMatch == cycle.begin()) and (cycle.end() == (itLastMatch + 1))) {
248  //This is just the entire cycle starting where we've already started
249  // before. Given the cycle was OK before, it would also be OK this time
250  return;
251  }
252  tempStack.insert(tempStack.end(), itStartMatch, itLastMatch + 1);
253  }
254 
255  tempStack = findMinimumCycle(tempStack, iGraph);
256  checkCycleForProblem(tempStack, iGraph);
257  }
258  }
259 
260  private:
261  std::string const& pathName(unsigned int iIndex) const { return m_pathNames[iIndex]; }
262 
263  std::string const& moduleName(unsigned int iIndex) const {
264  auto itFound = m_indexToNames.find(iIndex);
265  assert(itFound != m_indexToNames.end());
266  return itFound->second;
267  }
268 
269  void throwOnError(std::vector<Edge> const& iEdges,
271  Graph const& iGraph) const {
272  std::stringstream oStream;
273  oStream << "Module run order problem found: \n";
274  bool first_edge = true;
275  for (auto const& edge : iEdges) {
276  unsigned int in = iIndex[source(edge, iGraph)];
277  unsigned int out = iIndex[target(edge, iGraph)];
278 
279  if (first_edge) {
280  first_edge = false;
281  } else {
282  oStream << ", ";
283  }
284  oStream << moduleName(in);
285 
286  auto iFound = m_edgeToPathMap.find(SimpleEdge(in, out));
287  bool pathDependencyOnly = true;
288  for (auto dependency : iFound->second) {
289  if (dependency == edm::graph::kDataDependencyIndex) {
290  pathDependencyOnly = false;
291  break;
292  }
293  }
294  if (pathDependencyOnly) {
295  oStream << " after " << moduleName(out) << " [path " << pathName(iFound->second[0]) << "]";
296  } else {
297  oStream << " consumes " << moduleName(out);
298  }
299  }
300  oStream << "\n Running in the threaded framework would lead to indeterminate results."
301  "\n Please change order of modules in mentioned Path(s) to avoid inconsistent module ordering.";
302 
303  throw edm::Exception(edm::errors::ScheduleExecutionFailure, "Unrunnable schedule\n") << oStream.str() << "\n";
304  }
305 
306  std::vector<Edge> findMinimumCycle(std::vector<Edge> const& iCycleEdges, Graph const& iGraph) const {
307  //Remove unnecessary edges
308  // The graph library scans the verticies so we have edges in the list which are
309  // not part of the cycle but are associated to a vertex contributes to the cycle.
310  // To find these unneeded edges we work backwards on the edge list looking for cases
311  // where the 'in' on the previous edge is not the 'out' for the next edge. When this
312  // happens we know that there are additional edges for that same 'in' which can be
313  // removed.
314 
316  IndexMap const& index = get(boost::vertex_index, iGraph);
317 
318  std::vector<Edge> reducedEdges;
319  reducedEdges.reserve(iCycleEdges.size());
320  reducedEdges.push_back(iCycleEdges.back());
321  unsigned int lastIn = index[source(iCycleEdges.back(), iGraph)];
322  const unsigned int finalVertex = index[target(iCycleEdges.back(), iGraph)];
323  for (auto it = iCycleEdges.rbegin() + 1; it != iCycleEdges.rend(); ++it) {
324  unsigned int in = index[source(*it, iGraph)];
325  unsigned int out = index[target(*it, iGraph)];
326  if (lastIn == out) {
327  reducedEdges.push_back(*it);
328  lastIn = in;
329  if (in == finalVertex) {
330  break;
331  }
332  }
333  }
334  std::reverse(reducedEdges.begin(), reducedEdges.end());
335 
336  return reducedEdges;
337  }
338 
339  void checkCycleForProblem(std::vector<Edge> const& iCycleEdges, Graph const& iGraph) {
340  //For a real problem, we need at least one data dependency
341  // we already know we originate from a path because all tests
342  // require starting from the root node which connects to all paths
343  bool hasDataDependency = false;
344  //Since we are dealing with a circle, we initialize the 'last' info with the end of the graph
346  IndexMap const& index = get(boost::vertex_index, iGraph);
347 
348  unsigned int lastIn = index[source(iCycleEdges.back(), iGraph)];
349  unsigned int lastOut = index[target(iCycleEdges.back(), iGraph)];
350  bool lastEdgeHasDataDepencency = false;
351 
352  std::unordered_set<unsigned int> lastPathsSeen;
353 
354  //If a data dependency appears to make us jump off a path but that module actually
355  // appears on the path that was left, we need to see if we later come back to that
356  // path somewhere before that module. If not than it is a false cycle
357  std::unordered_multimap<unsigned int, unsigned int> pathToModulesWhichMustAppearLater;
358  bool moduleAppearedEarlierInPath = false;
359 
360  for (auto dependency : m_edgeToPathMap.find(SimpleEdge(lastIn, lastOut))->second) {
361  if (dependency != edm::graph::kDataDependencyIndex) {
362  lastPathsSeen.insert(dependency);
363  } else {
364  lastEdgeHasDataDepencency = true;
365  }
366  }
367  //Need to check that the
368  bool minimumInitialPathsSet = false;
369  std::unordered_set<unsigned int> initialPaths(lastPathsSeen);
370  std::unordered_set<unsigned int> sharedPaths;
371  for (auto const& edge : iCycleEdges) {
372  unsigned int in = index[source(edge, iGraph)];
373  unsigned int out = index[target(edge, iGraph)];
374 
375  auto iFound = m_edgeToPathMap.find(SimpleEdge(in, out));
376  std::unordered_set<unsigned int> pathsOnEdge;
377  bool edgeHasDataDependency = false;
378  for (auto dependency : iFound->second) {
379  if (dependency == edm::graph::kDataDependencyIndex) {
380  //need to count only if this moves us to a new path
381  hasDataDependency = true;
382  edgeHasDataDependency = true;
383  } else {
384  pathsOnEdge.insert(dependency);
385 
386  auto const& pathIndicies = m_pathIndexToModuleIndexOrder[dependency];
387  auto pathToCheckRange = pathToModulesWhichMustAppearLater.equal_range(dependency);
388  for (auto it = pathToCheckRange.first; it != pathToCheckRange.second;) {
389  auto moduleIDToCheck = it->second;
390  if (moduleIDToCheck == in or moduleIDToCheck == out) {
391  auto toErase = it;
392  ++it;
393  pathToModulesWhichMustAppearLater.erase(toErase);
394  continue;
395  }
396  bool alreadyAdvanced = false;
397  for (auto index : pathIndicies) {
398  if (index == out) {
399  //we must have skipped over the module so the earlier worry about the
400  // module being called on the path was wrong
401  auto toErase = it;
402  ++it;
403  alreadyAdvanced = true;
404  pathToModulesWhichMustAppearLater.erase(toErase);
405  break;
406  }
407  if (index == moduleIDToCheck) {
408  //module still earlier on the path
409  break;
410  }
411  }
412  if (not alreadyAdvanced) {
413  ++it;
414  }
415  }
416  }
417  }
418  sharedPaths = intersect(pathsOnEdge, lastPathsSeen);
419  if (sharedPaths.empty()) {
420  minimumInitialPathsSet = true;
421  if ((not edgeHasDataDependency) and (not lastEdgeHasDataDepencency) and (not lastPathsSeen.empty())) {
422  //If we jumped from one path to another without a data dependency
423  // than the cycle is just because two independent modules were
424  // scheduled in different arbitrary order on different paths
425  return;
426  }
427  if (edgeHasDataDependency and not lastPathsSeen.empty()) {
428  //If the paths we were on had this module we are going to earlier
429  // on their paths than we do not have a real cycle
430  bool atLeastOnePathFailed = false;
431  std::vector<unsigned int> pathsToWatch;
432  pathsToWatch.reserve(lastPathsSeen.size());
433  for (auto seenPath : lastPathsSeen) {
434  if (pathsOnEdge.end() == pathsOnEdge.find(seenPath)) {
435  //we left this path so we now need to see if the module 'out'
436  // is on this path ahead of the module 'in'
437  bool foundOut = false;
438  for (auto index : m_pathIndexToModuleIndexOrder[seenPath]) {
439  if (index == out) {
440  foundOut = true;
441  pathsToWatch.push_back(seenPath);
442  }
443  if (index == lastOut) {
444  if (not foundOut) {
445  atLeastOnePathFailed = true;
446  }
447  break;
448  }
449  if (atLeastOnePathFailed) {
450  break;
451  }
452  }
453  }
454  }
455  //If all the paths have the module earlier in their paths
456  // then there was no need to jump between paths to get it
457  // and this breaks the data cycle
458  if (not atLeastOnePathFailed) {
459  moduleAppearedEarlierInPath = true;
460  for (auto p : pathsToWatch) {
461  pathToModulesWhichMustAppearLater.emplace(p, out);
462  }
463  }
464  }
465  lastPathsSeen = pathsOnEdge;
466  } else {
467  lastPathsSeen = sharedPaths;
468  if (not minimumInitialPathsSet) {
469  initialPaths = sharedPaths;
470  }
471  }
472  lastOut = out;
473  lastEdgeHasDataDepencency = edgeHasDataDependency;
474  }
475  if (moduleAppearedEarlierInPath and not pathToModulesWhichMustAppearLater.empty()) {
476  return;
477  }
478  if (not hasDataDependency) {
479  return;
480  }
481  if ((not initialPaths.empty()) and intersect(initialPaths, sharedPaths).empty()) {
482  //The effective start and end paths for the first graph
483  // node do not match. This can happen if the node
484  // appears on multiple paths
485  return;
486  }
487  throwOnError(iCycleEdges, index, iGraph);
488  }
489 
490  EdgeToPathMap const& m_edgeToPathMap;
491  std::vector<std::vector<unsigned int>> const& m_pathIndexToModuleIndexOrder;
492  std::vector<std::string> const& m_pathNames;
493  std::unordered_map<unsigned int, std::string> m_indexToNames;
494  std::unordered_map<unsigned int, std::vector<unsigned int>> m_pathToModuleIndex;
495 
496  std::vector<Edge> m_stack;
497  std::vector<std::vector<Edge>> m_fundamentalCycles;
498  std::set<unsigned int> m_verticiesInFundamentalCycles;
499  };
500 } // namespace
501 
502 void edm::graph::throwIfImproperDependencies(EdgeToPathMap const& iEdgeToPathMap,
503  std::vector<std::vector<unsigned int>> const& iPathIndexToModuleIndexOrder,
504  std::vector<std::string> const& iPathNames,
505  std::unordered_map<unsigned int, std::string> const& iModuleIndexToNames) {
506  //Now use boost graph library to find cycles in the dependencies
507  std::vector<SimpleEdge> outList;
508  outList.reserve(iEdgeToPathMap.size());
509  for (auto const& edgeInfo : iEdgeToPathMap) {
510  outList.push_back(edgeInfo.first);
511  }
512 
513  Graph g(outList.begin(), outList.end(), iModuleIndexToNames.size());
514 
515  cycle_detector detector(iEdgeToPathMap, iPathIndexToModuleIndexOrder, iPathNames, iModuleIndexToNames);
516  boost::depth_first_search(g, boost::visitor(detector));
517 }
type
Definition: HCALResponse.h:21
bool compare(const P &i, const P &j)
void throwIfImproperDependencies(EdgeToPathMap const &, std::vector< std::vector< unsigned int >> const &iPathIndexToModuleIndexOrder, std::vector< std::string > const &iPathNames, std::unordered_map< unsigned int, std::string > const &iModuleIndexToNames)
std::map< SimpleEdge, std::vector< unsigned int >> EdgeToPathMap
constexpr auto kDataDependencyIndex
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e g
Definition: Activities.doc:4
std::string moduleName(Provenance const &provenance)
Definition: Provenance.cc:27
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
stack
Definition: svgfig.py:559
static std::string const source
Definition: EdmProvDump.cc:47
DDCompactView::Graph Graph
std::pair< unsigned int, unsigned int > SimpleEdge
def move(src, dest)
Definition: eostools.py:511