CMS 3D CMS Logo

throwIfImproperDependencies.cc
Go to the documentation of this file.
1 // -*- ++ -*-
2 //
3 // Package: FWCore/Framework
4 // Function: throwIfImproperDependencies
5 //
6 // Implementation:
7 // [Notes on implementation]
8 //
9 // Original Author: root
10 // Created: Tue, 06 Sep 2016 16:04:28 GMT
11 //
12 
13 // system include files
14 
15 // user include files
18 
19 
20 #include "boost/graph/graph_traits.hpp"
21 #include "boost/graph/adjacency_list.hpp"
22 #include "boost/graph/depth_first_search.hpp"
23 #include "boost/graph/visitors.hpp"
24 
25 namespace {
26  //====================================
27  // checkForCorrectness algorithm
28  //
29  // The code creates a 'dependency' graph between all
30  // modules. A module depends on another module if
31  // 1) it 'consumes' data produced by that module
32  // 2) it appears directly after the module within a Path
33  //
34  // If there is a cycle in the 'dependency' graph then
35  // the schedule may be unrunnable. The schedule is still
36  // runnable if all cycles have at least two edges which
37  // connect modules only by Path dependencies (i.e. not
38  // linked by a data dependency).
39  //
40  // Example 1:
41  // C consumes data from B
42  // Path 1: A + B + C
43  // Path 2: B + C + A
44  //
45  // Cycle: A after C [p2], C consumes B, B after A [p1]
46  // Since this cycle has 2 path only edges it is OK since
47  // A and (B+C) are independent so their run order doesn't matter
48  //
49  // Example 2:
50  // B consumes A
51  // C consumes B
52  // Path: C + A
53  //
54  // Cycle: A after C [p], C consumes B, B consumes A
55  // Since this cycle has 1 path only edge it is unrunnable.
56  //
57  // Example 3:
58  // A consumes B
59  // B consumes C
60  // C consumes A
61  // (no Path since unscheduled execution)
62  //
63  // Cycle: A consumes B, B consumes C, C consumes A
64  // Since this cycle has 0 path only edges it is unrunnable.
65  //====================================
66 
67  typedef std::pair<unsigned int, unsigned int> SimpleEdge;
68  typedef std::map<SimpleEdge, std::vector<unsigned int>> EdgeToPathMap;
69 
70  typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS> Graph;
71 
72  typedef boost::graph_traits<Graph>::edge_descriptor Edge;
73 
74  template<typename T>
75  std::unordered_set<T> intersect(std::unordered_set<T> const& iLHS, std::unordered_set<T> const& iRHS) {
76  std::unordered_set<T> result;
77  if(iLHS.size() < iRHS.size()) {
78  result.reserve(iLHS.size());
79  for(auto const& l: iLHS) {
80  if(iRHS.find(l) != iRHS.end()) {
81  result.insert(l);
82  }
83  }
84  return result;
85  }
86  result.reserve(iRHS.size());
87  for(auto const& r: iRHS) {
88  if(iLHS.find(r) != iLHS.end()) {
89  result.insert(r);
90  }
91  }
92  return result;
93  }
94 
95  struct cycle_detector : public boost::dfs_visitor<> {
96 
97  static const unsigned int kRootVertexIndex=0;
98 
99  cycle_detector(EdgeToPathMap const& iEdgeToPathMap,
100  std::vector<std::vector<unsigned int>> const& iPathIndexToModuleIndexOrder,
101  std::vector<std::string> const& iPathNames,
102  std::unordered_map<unsigned int,std::string> const& iModuleIndexToNames):
103  m_edgeToPathMap(iEdgeToPathMap),
104  m_pathIndexToModuleIndexOrder(iPathIndexToModuleIndexOrder),
105  m_pathNames(iPathNames),
106  m_indexToNames(iModuleIndexToNames){}
107 
108  bool compare(Edge const& iLHS, Edge const& iRHS) const;
109 
110  void tree_edge(Edge const& iEdge, Graph const& iGraph) {
111  auto const& index = get(boost::vertex_index, iGraph);
112 
113  auto in = index[source(iEdge,iGraph)];
114  for( auto it = m_stack.begin(); it != m_stack.end(); ++it) {
115  if(in ==index[source(*it,iGraph)]) {
116  //this vertex is now being used to probe a new edge
117  // so we should drop the rest of the tree
118  m_stack.erase(it, m_stack.end());
119  break;
120  }
121  }
122 
123  m_stack.push_back(iEdge);
124  }
125 
126  void finish_vertex(unsigned int iVertex, Graph const& iGraph) {
127  if(not m_stack.empty()) {
128  auto const& index = get(boost::vertex_index, iGraph);
129 
130  if (iVertex ==index[source(m_stack.back(),iGraph)]) {
131  m_stack.pop_back();
132  }
133  }
134  }
135 
136  //Called if a cycle happens
137  void back_edge(Edge const& iEdge, Graph const& iGraph) {
138 
139  auto const& index = get(boost::vertex_index, iGraph);
140 
141  if(kRootVertexIndex != index[source(m_stack.front(),iGraph)]) {
142  //this part of the graph is not connected to data processing
143  return;
144  }
145 
146  m_stack.push_back(iEdge);
147 
148  auto pop_stack = [](std::vector<Edge>* stack) { stack->pop_back(); };
149  std::unique_ptr<std::vector<Edge>,decltype(pop_stack)> guard(&m_stack,pop_stack);
150 
151  //This edge has not been added to the stack yet
152  // making a copy allows us to add it in but not worry
153  // about removing it at the end of the routine
154  std::vector<Edge> tempStack;
155 
156  tempStack= findMinimumCycle(m_stack,iGraph);
157  checkCycleForProblem(tempStack,iGraph);
158  for(auto const& edge: tempStack) {
159  unsigned int in =index[source(edge,iGraph)];
160  unsigned int out =index[target(edge,iGraph)];
161 
162  m_verticiesInFundamentalCycles.insert(in);
163  m_verticiesInFundamentalCycles.insert(out);
164  }
165 
166  //NOTE: Need to remove any 'extra' bits at beginning of stack
167  // which may not be part of the cycle
168  m_fundamentalCycles.emplace_back(std::move(tempStack));
169  }
170 
171  void forward_or_cross_edge(Edge iEdge, Graph const& iGraph)
172  {
173 
175  IndexMap const& index = get(boost::vertex_index, iGraph);
176 
177  if(kRootVertexIndex != index[source(m_stack.front(),iGraph)]) {
178  //this part of the graph is not connected to data processing
179  return;
180  }
181 
182  const unsigned int out =index[target(iEdge,iGraph)];
183 
184  //If this is a crossing edge whose out vertex is part of a fundamental cycle
185  // then this path is also part of a cycle
186  if(m_verticiesInFundamentalCycles.end() == m_verticiesInFundamentalCycles.find(out)) {
187  return;
188  }
189 
190  for(auto const& cycle: m_fundamentalCycles) {
191  //Is the out vertex in this cycle?
192  auto itStartMatch = cycle.end();
193  for(auto it =cycle.begin(); it!= cycle.end(); ++it) {
194  unsigned int inCycle =index[source(*it,iGraph)];
195 
196  if(out == inCycle) {
197  itStartMatch = it;
198  break;
199  }
200  }
201  if(itStartMatch == cycle.end()) {
202  //this cycle isn't the one which uses the vertex from the stack
203  continue;
204  }
205 
206  //tempStack will hold a stack that could have been found by depth first
207  // search if module to index ordering had been different
208  m_stack.push_back(iEdge);
209  auto pop_stack = [](std::vector<Edge>* stack) { stack->pop_back(); };
210  std::unique_ptr<std::vector<Edge>,decltype(pop_stack)> guard(&m_stack,pop_stack);
211  auto tempStack= findMinimumCycle(m_stack,iGraph);
212 
213  //the set of 'in' verticies presently in the stack is used to find where an 'out'
214  // vertex from the fundamental cycle connects into the present stack
215  std::set<unsigned int> verticiesInStack;
216  for(auto const& edge: tempStack) {
217  verticiesInStack.insert(index[source(edge,iGraph)]);
218  }
219 
220 
221 
222  //Now find place in the fundamental cycle that attaches to the stack
223  // First see if that happens later in the stack
224  auto itLastMatch = cycle.end();
225  for(auto it=itStartMatch; it != cycle.end(); ++it) {
226  unsigned int outCycle =index[target(*it,iGraph)];
227  if(verticiesInStack.end() != verticiesInStack.find(outCycle)) {
228  itLastMatch = it;
229  break;
230  }
231  }
232  if(itLastMatch==cycle.end()) {
233  //See if we can find the attachment to the stack earlier in the cycle
234  tempStack.insert(tempStack.end(),itStartMatch,cycle.end());
235  for(auto it=cycle.begin(); it != itStartMatch;++it) {
236  unsigned int outCycle =index[target(*it,iGraph)];
237  if(verticiesInStack.end() != verticiesInStack.find(outCycle)) {
238  itLastMatch = it;
239  break;
240  }
241  }
242  if( itLastMatch==cycle.end()) {
243  //need to use the full cycle
244  //NOTE: this should just retest the same cycle but starting
245  // from a different position. If everything is correct, then
246  // this should also pass so in principal we could return here.
247  //However, as long as this isn't a performance problem, having
248  // this additional check could catch problems in the algorithm.
249  tempStack.insert(tempStack.end(),cycle.begin(),itStartMatch);
250  } else {
251  tempStack.insert(tempStack.end(),cycle.begin(),itLastMatch+1);
252  }
253  } else {
254  if( (itStartMatch == cycle.begin()) and (cycle.end() == (itLastMatch+1)) ) {
255  //This is just the entire cycle starting where we've already started
256  // before. Given the cycle was OK before, it would also be OK this time
257  return;
258  }
259  tempStack.insert(tempStack.end(),itStartMatch,itLastMatch+1);
260  }
261 
262  tempStack= findMinimumCycle(tempStack,iGraph);
263  checkCycleForProblem(tempStack,iGraph);
264  }
265 
266  }
267  private:
268  std::string const& pathName(unsigned int iIndex) const {
269  return m_pathNames[iIndex];
270  }
271 
272  std::string const& moduleName(unsigned int iIndex) const {
273  auto itFound =m_indexToNames.find(iIndex);
274  assert(itFound != m_indexToNames.end());
275  return itFound->second;
276  }
277 
278  void
279  throwOnError(std::vector<Edge>const& iEdges,
281  Graph const& iGraph) const {
282  std::stringstream oStream;
283  oStream <<"Module run order problem found: \n";
284  bool first_edge = true;
285  for(auto const& edge: iEdges) {
286  unsigned int in =iIndex[source(edge,iGraph)];
287  unsigned int out =iIndex[target(edge,iGraph)];
288 
289  if(first_edge) {
290  first_edge = false;
291  } else {
292  oStream<<", ";
293  }
294  oStream <<moduleName(in);
295 
296  auto iFound = m_edgeToPathMap.find(SimpleEdge(in,out));
297  bool pathDependencyOnly = true;
298  for(auto dependency : iFound->second) {
299  if (dependency == edm::graph::kDataDependencyIndex) {
300  pathDependencyOnly = false;
301  break;
302  }
303  }
304  if (pathDependencyOnly) {
305  oStream <<" after "<<moduleName(out)<<" [path "<<pathName(iFound->second[0])<<"]";
306  } else {
307  oStream <<" consumes "<<moduleName(out);
308  }
309  }
310  oStream<<"\n Running in the threaded framework would lead to indeterminate results."
311  "\n Please change order of modules in mentioned Path(s) to avoid inconsistent module ordering.";
312 
313  throw edm::Exception(edm::errors::ScheduleExecutionFailure, "Unrunnable schedule\n")
314  << oStream.str() << "\n";
315  }
316 
317  std::vector<Edge> findMinimumCycle(std::vector<Edge> const& iCycleEdges,
318  Graph const& iGraph) const {
319  //Remove unnecessary edges
320  // The graph library scans the verticies so we have edges in the list which are
321  // not part of the cycle but are associated to a vertex contributes to the cycle.
322  // To find these unneeded edges we work backwards on the edge list looking for cases
323  // where the 'in' on the previous edge is not the 'out' for the next edge. When this
324  // happens we know that there are additional edges for that same 'in' which can be
325  // removed.
326 
328  IndexMap const& index = get(boost::vertex_index, iGraph);
329 
330  std::vector<Edge> reducedEdges;
331  reducedEdges.reserve(iCycleEdges.size());
332  reducedEdges.push_back(iCycleEdges.back());
333  unsigned int lastIn = index[source(iCycleEdges.back(),iGraph)];
334  const unsigned int finalVertex = index[target(iCycleEdges.back(),iGraph)];
335  for( auto it = iCycleEdges.rbegin()+1; it != iCycleEdges.rend(); ++it) {
336  unsigned int in =index[source(*it,iGraph)];
337  unsigned int out =index[target(*it,iGraph)];
338  if(lastIn == out) {
339  reducedEdges.push_back(*it);
340  lastIn = in;
341  if(in == finalVertex) {
342  break;
343  }
344  }
345  }
346  std::reverse(reducedEdges.begin(), reducedEdges.end());
347 
348  return reducedEdges;
349  }
350 
351  void checkCycleForProblem(std::vector<Edge> const& iCycleEdges,
352  Graph const& iGraph) {
353  //For a real problem, we need at least one data dependency
354  // we already know we originate from a path because all tests
355  // require starting from the root node which connects to all paths
356  bool hasDataDependency =false;
357  //Since we are dealing with a circle, we initialize the 'last' info with the end of the graph
359  IndexMap const& index = get(boost::vertex_index, iGraph);
360 
361  unsigned int lastIn =index[source(iCycleEdges.back(),iGraph)];
362  unsigned int lastOut = index[target(iCycleEdges.back(),iGraph)];
363  bool lastEdgeHasDataDepencency = false;
364 
365  std::unordered_set<unsigned int> lastPathsSeen;
366 
367  //If a data dependency appears to make us jump off a path but that module actually
368  // appears on the path that was left, we need to see if we later come back to that
369  // path somewhere before that module. If not than it is a false cycle
370  std::unordered_multimap<unsigned int, unsigned int> pathToModulesWhichMustAppearLater;
371  bool moduleAppearedEarlierInPath = false;
372 
373  for(auto dependency: m_edgeToPathMap.find(SimpleEdge(lastIn,lastOut))->second) {
374  if(dependency !=edm::graph::kDataDependencyIndex) {
375  lastPathsSeen.insert(dependency);
376  } else {
377  lastEdgeHasDataDepencency = true;
378  }
379  }
380  //Need to check that the
381  bool minimumInitialPathsSet = false;
382  std::unordered_set<unsigned int> initialPaths(lastPathsSeen);
383  std::unordered_set<unsigned int> sharedPaths;
384  for(auto const& edge: iCycleEdges) {
385  unsigned int in =index[source(edge,iGraph)];
386  unsigned int out =index[target(edge,iGraph)];
387 
388  auto iFound = m_edgeToPathMap.find(SimpleEdge(in,out));
389  std::unordered_set<unsigned int> pathsOnEdge;
390  bool edgeHasDataDependency = false;
391  for(auto dependency : iFound->second) {
392  if (dependency == edm::graph::kDataDependencyIndex) {
393  //need to count only if this moves us to a new path
394  hasDataDependency = true;
395  edgeHasDataDependency = true;
396  } else {
397  pathsOnEdge.insert(dependency);
398 
399  auto const& pathIndicies = m_pathIndexToModuleIndexOrder[dependency];
400  auto pathToCheckRange = pathToModulesWhichMustAppearLater.equal_range(dependency);
401  for(auto it = pathToCheckRange.first; it != pathToCheckRange.second; ) {
402  auto moduleIDToCheck = it->second;
403  if(moduleIDToCheck == in or moduleIDToCheck==out) {
404  auto toErase = it;
405  ++it;
406  pathToModulesWhichMustAppearLater.erase(toErase);
407  continue;
408  }
409  bool alreadyAdvanced = false;
410  for(auto index : pathIndicies) {
411  if(index == out) {
412  //we must have skipped over the module so the earlier worry about the
413  // module being called on the path was wrong
414  auto toErase = it;
415  ++it;
416  alreadyAdvanced =true;
417  pathToModulesWhichMustAppearLater.erase(toErase);
418  break;
419  }
420  if(index == moduleIDToCheck) {
421  //module still earlier on the path
422  break;
423  }
424  }
425  if(not alreadyAdvanced) {
426  ++it;
427  }
428  }
429  }
430  }
431  sharedPaths = intersect(pathsOnEdge,lastPathsSeen);
432  if(sharedPaths.empty() ) {
433  minimumInitialPathsSet = true;
434  if((not edgeHasDataDependency) and (not lastEdgeHasDataDepencency) and (not lastPathsSeen.empty())) {
435  //If we jumped from one path to another without a data dependency
436  // than the cycle is just because two independent modules were
437  // scheduled in different arbitrary order on different paths
438  return;
439  }
440  if(edgeHasDataDependency and not lastPathsSeen.empty()) {
441  //If the paths we were on had this module we are going to earlier
442  // on their paths than we do not have a real cycle
443  bool atLeastOnePathFailed = false;
444  std::vector<unsigned int> pathsToWatch;
445  pathsToWatch.reserve(lastPathsSeen.size());
446  for(auto seenPath: lastPathsSeen) {
447  if(pathsOnEdge.end() == pathsOnEdge.find(seenPath)) {
448  //we left this path so we now need to see if the module 'out'
449  // is on this path ahead of the module 'in'
450  bool foundOut = false;
451  for(auto index: m_pathIndexToModuleIndexOrder[seenPath]) {
452  if(index == out) {
453  foundOut = true;
454  pathsToWatch.push_back(seenPath);
455  }
456  if(index == lastOut) {
457  if(not foundOut) {
458  atLeastOnePathFailed = true;
459  }
460  break;
461  }
462  if(atLeastOnePathFailed) {
463  break;
464  }
465  }
466  }
467  }
468  //If all the paths have the module earlier in their paths
469  // then there was no need to jump between paths to get it
470  // and this breaks the data cycle
471  if(not atLeastOnePathFailed) {
472  moduleAppearedEarlierInPath = true;
473  for(auto p: pathsToWatch) {
474  pathToModulesWhichMustAppearLater.emplace(p,out);
475  }
476  }
477  }
478  lastPathsSeen = pathsOnEdge;
479  } else {
480  lastPathsSeen = sharedPaths;
481  if (not minimumInitialPathsSet) {
482  initialPaths = sharedPaths;
483  }
484  }
485  lastOut = out;
486  lastEdgeHasDataDepencency =edgeHasDataDependency;
487  }
488  if(moduleAppearedEarlierInPath and not pathToModulesWhichMustAppearLater.empty()) {
489  return;
490  }
491  if(not hasDataDependency) {
492  return;
493  }
494  if( (not initialPaths.empty()) and intersect(initialPaths,sharedPaths).empty() ) {
495  //The effective start and end paths for the first graph
496  // node do not match. This can happen if the node
497  // appears on multiple paths
498  return;
499  }
500  throwOnError(iCycleEdges,index,iGraph);
501 
502  }
503 
504 
505  EdgeToPathMap const& m_edgeToPathMap;
506  std::vector<std::vector<unsigned int>> const& m_pathIndexToModuleIndexOrder;
507  std::vector<std::string> const& m_pathNames;
508  std::unordered_map<unsigned int,std::string> m_indexToNames;
509  std::unordered_map<unsigned int, std::vector<unsigned int>> m_pathToModuleIndex;
510 
511  std::vector<Edge> m_stack;
512  std::vector<std::vector<Edge>> m_fundamentalCycles;
513  std::set<unsigned int> m_verticiesInFundamentalCycles;
514  };
515 }
516 
517 
518 
519 void
520 edm::graph::throwIfImproperDependencies(EdgeToPathMap const& iEdgeToPathMap,
521  std::vector<std::vector<unsigned int>> const& iPathIndexToModuleIndexOrder,
522  std::vector<std::string> const& iPathNames,
523  std::unordered_map<unsigned int, std::string> const& iModuleIndexToNames) {
524 
525  //Now use boost graph library to find cycles in the dependencies
526  std::vector<SimpleEdge> outList;
527  outList.reserve(iEdgeToPathMap.size());
528  for(auto const& edgeInfo: iEdgeToPathMap) {
529  outList.push_back(edgeInfo.first);
530  }
531 
532  Graph g(outList.begin(),outList.end(), iModuleIndexToNames.size());
533 
534  cycle_detector detector(iEdgeToPathMap,iPathIndexToModuleIndexOrder,iPathNames,iModuleIndexToNames);
535  boost::depth_first_search(g,boost::visitor(detector));
536 }
type
Definition: HCALResponse.h:21
bool compare(const P &i, const P &j)
void throwIfImproperDependencies(EdgeToPathMap const &, std::vector< std::vector< unsigned int >> const &iPathIndexToModuleIndexOrder, std::vector< std::string > const &iPathNames, std::unordered_map< unsigned int, std::string > const &iModuleIndexToNames)
std::map< SimpleEdge, std::vector< unsigned int >> EdgeToPathMap
constexpr auto kDataDependencyIndex
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e g
Definition: Activities.doc:4
std::string moduleName(Provenance const &provenance)
Definition: Provenance.cc:27
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
stack
Definition: svgfig.py:558
static std::string const source
Definition: EdmProvDump.cc:43
std::pair< unsigned int, unsigned int > SimpleEdge
def move(src, dest)
Definition: eostools.py:510