CMS 3D CMS Logo

throwIfImproperDependencies.cc
Go to the documentation of this file.
1 // -*- ++ -*-
2 //
3 // Package: FWCore/Framework
4 // Function: throwIfImproperDependencies
5 //
6 // Implementation:
7 // [Notes on implementation]
8 //
9 // Original Author: root
10 // Created: Tue, 06 Sep 2016 16:04:28 GMT
11 //
12 
13 // system include files
14 
15 // user include files
18 
19 
20 #include "boost/graph/graph_traits.hpp"
21 #include "boost/graph/adjacency_list.hpp"
22 #include "boost/graph/depth_first_search.hpp"
23 #include "boost/graph/visitors.hpp"
24 
25 namespace {
26  //====================================
27  // checkForCorrectness algorithm
28  //
29  // The code creates a 'dependency' graph between all
30  // modules. A module depends on another module if
31  // 1) it 'consumes' data produced by that module
32  // 2) it appears directly after the module within a Path
33  //
34  // If there is a cycle in the 'dependency' graph then
35  // the schedule may be unrunnable. The schedule is still
36  // runnable if all cycles have at least two edges which
37  // connect modules only by Path dependencies (i.e. not
38  // linked by a data dependency).
39  //
40  // Example 1:
41  // C consumes data from B
42  // Path 1: A + B + C
43  // Path 2: B + C + A
44  //
45  // Cycle: A after C [p2], C consumes B, B after A [p1]
46  // Since this cycle has 2 path only edges it is OK since
47  // A and (B+C) are independent so their run order doesn't matter
48  //
49  // Example 2:
50  // B consumes A
51  // C consumes B
52  // Path: C + A
53  //
54  // Cycle: A after C [p], C consumes B, B consumes A
55  // Since this cycle has 1 path only edge it is unrunnable.
56  //
57  // Example 3:
58  // A consumes B
59  // B consumes C
60  // C consumes A
61  // (no Path since unscheduled execution)
62  //
63  // Cycle: A consumes B, B consumes C, C consumes A
64  // Since this cycle has 0 path only edges it is unrunnable.
65  //====================================
66 
67  typedef std::pair<unsigned int, unsigned int> SimpleEdge;
68  typedef std::map<SimpleEdge, std::vector<unsigned int>> EdgeToPathMap;
69 
70  typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS> Graph;
71 
72  typedef boost::graph_traits<Graph>::edge_descriptor Edge;
73  struct cycle_detector : public boost::dfs_visitor<> {
74 
75  static const unsigned int kRootVertexIndex=0;
76 
77  cycle_detector(EdgeToPathMap const& iEdgeToPathMap,
78  std::vector<std::vector<unsigned int>> const& iPathIndexToModuleIndexOrder,
79  std::vector<std::string> const& iPathNames,
80  std::unordered_map<unsigned int,std::string> const& iModuleIndexToNames):
81  m_edgeToPathMap(iEdgeToPathMap),
82  m_pathIndexToModuleIndexOrder(iPathIndexToModuleIndexOrder),
83  m_pathNames(iPathNames),
84  m_indexToNames(iModuleIndexToNames){}
85 
86  bool compare(Edge const& iLHS, Edge const& iRHS) const;
87 
88  void tree_edge(Edge const& iEdge, Graph const& iGraph) {
89  auto const& index = get(boost::vertex_index, iGraph);
90 
91  auto in = index[source(iEdge,iGraph)];
92  for( auto it = m_stack.begin(); it != m_stack.end(); ++it) {
93  if(in ==index[source(*it,iGraph)]) {
94  //this vertex is now being used to probe a new edge
95  // so we should drop the rest of the tree
96  m_stack.erase(it, m_stack.end());
97  break;
98  }
99  }
100 
101  m_stack.push_back(iEdge);
102  }
103 
104  void finish_vertex(unsigned int iVertex, Graph const& iGraph) {
105  if(not m_stack.empty()) {
106  auto const& index = get(boost::vertex_index, iGraph);
107 
108  if (iVertex ==index[source(m_stack.back(),iGraph)]) {
109  m_stack.pop_back();
110  }
111  }
112  }
113 
114  //Called if a cycle happens
115  void back_edge(Edge const& iEdge, Graph const& iGraph) {
116 
117  auto const& index = get(boost::vertex_index, iGraph);
118 
119  if(kRootVertexIndex != index[source(m_stack.front(),iGraph)]) {
120  //this part of the graph is not connected to data processing
121  return;
122  }
123 
124  m_stack.push_back(iEdge);
125 
126  auto pop_stack = [](std::vector<Edge>* stack) { stack->pop_back(); };
127  std::unique_ptr<std::vector<Edge>,decltype(pop_stack)> guard(&m_stack,pop_stack);
128 
129  //This edge has not been added to the stack yet
130  // making a copy allows us to add it in but not worry
131  // about removing it at the end of the routine
132  std::vector<Edge> tempStack;
133 
134  tempStack= findMinimumCycle(m_stack,iGraph);
135  checkCycleForProblem(tempStack,iGraph);
136  for(auto const& edge: tempStack) {
137  unsigned int in =index[source(edge,iGraph)];
138  unsigned int out =index[target(edge,iGraph)];
139 
140  m_verticiesInFundamentalCycles.insert(in);
141  m_verticiesInFundamentalCycles.insert(out);
142  }
143 
144  //NOTE: Need to remove any 'extra' bits at beginning of stack
145  // which may not be part of the cycle
146  m_fundamentalCycles.emplace_back(std::move(tempStack));
147  }
148 
149  void forward_or_cross_edge(Edge iEdge, Graph const& iGraph)
150  {
151 
153  IndexMap const& index = get(boost::vertex_index, iGraph);
154 
155  if(kRootVertexIndex != index[source(m_stack.front(),iGraph)]) {
156  //this part of the graph is not connected to data processing
157  return;
158  }
159 
160  const unsigned int out =index[target(iEdge,iGraph)];
161 
162  //If this is a crossing edge whose out vertex is part of a fundamental cycle
163  // then this path is also part of a cycle
164  if(m_verticiesInFundamentalCycles.end() == m_verticiesInFundamentalCycles.find(out)) {
165  return;
166  }
167 
168  for(auto const& cycle: m_fundamentalCycles) {
169  //Is the out vertex in this cycle?
170  auto itStartMatch = cycle.end();
171  for(auto it =cycle.begin(); it!= cycle.end(); ++it) {
172  unsigned int inCycle =index[source(*it,iGraph)];
173 
174  if(out == inCycle) {
175  itStartMatch = it;
176  break;
177  }
178  }
179  if(itStartMatch == cycle.end()) {
180  //this cycle isn't the one which uses the vertex from the stack
181  continue;
182  }
183 
184  //tempStack will hold a stack that could have been found by depth first
185  // search if module to index ordering had been different
186  m_stack.push_back(iEdge);
187  auto pop_stack = [](std::vector<Edge>* stack) { stack->pop_back(); };
188  std::unique_ptr<std::vector<Edge>,decltype(pop_stack)> guard(&m_stack,pop_stack);
189  auto tempStack= findMinimumCycle(m_stack,iGraph);
190 
191  //the set of 'in' verticies presently in the stack is used to find where an 'out'
192  // vertex from the fundamental cycle connects into the present stack
193  std::set<unsigned int> verticiesInStack;
194  for(auto const& edge: tempStack) {
195  verticiesInStack.insert(index[source(edge,iGraph)]);
196  }
197 
198 
199 
200  //Now find place in the fundamental cycle that attaches to the stack
201  // First see if that happens later in the stack
202  auto itLastMatch = cycle.end();
203  for(auto it=itStartMatch; it != cycle.end(); ++it) {
204  unsigned int outCycle =index[target(*it,iGraph)];
205  if(verticiesInStack.end() != verticiesInStack.find(outCycle)) {
206  itLastMatch = it;
207  break;
208  }
209  }
210  if(itLastMatch==cycle.end()) {
211  //See if we can find the attachment to the stack earlier in the cycle
212  tempStack.insert(tempStack.end(),itStartMatch,cycle.end());
213  for(auto it=cycle.begin(); it != itStartMatch;++it) {
214  unsigned int outCycle =index[target(*it,iGraph)];
215  if(verticiesInStack.end() != verticiesInStack.find(outCycle)) {
216  itLastMatch = it;
217  break;
218  }
219  }
220  if( itLastMatch==cycle.end()) {
221  //need to use the full cycle
222  //NOTE: this should just retest the same cycle but starting
223  // from a different position. If everything is correct, then
224  // this should also pass so in principal we could return here.
225  //However, as long as this isn't a performance problem, having
226  // this additional check could catch problems in the algorithm.
227  tempStack.insert(tempStack.end(),cycle.begin(),itStartMatch);
228  } else {
229  tempStack.insert(tempStack.end(),cycle.begin(),itLastMatch+1);
230  }
231  } else {
232  if( (itStartMatch == cycle.begin()) and (cycle.end() == (itLastMatch+1)) ) {
233  //This is just the entire cycle starting where we've already started
234  // before. Given the cycle was OK before, it would also be OK this time
235  return;
236  }
237  tempStack.insert(tempStack.end(),itStartMatch,itLastMatch+1);
238  }
239 
240  tempStack= findMinimumCycle(tempStack,iGraph);
241  checkCycleForProblem(tempStack,iGraph);
242  }
243 
244  }
245  private:
246  std::string const& pathName(unsigned int iIndex) const {
247  return m_pathNames[iIndex];
248  }
249 
250  std::string const& moduleName(unsigned int iIndex) const {
251  auto itFound =m_indexToNames.find(iIndex);
252  assert(itFound != m_indexToNames.end());
253  return itFound->second;
254  }
255 
256  void
257  throwOnError(std::vector<Edge>const& iEdges,
259  Graph const& iGraph) const {
260  std::stringstream oStream;
261  oStream <<"Module run order problem found: \n";
262  bool first_edge = true;
263  for(auto const& edge: iEdges) {
264  unsigned int in =iIndex[source(edge,iGraph)];
265  unsigned int out =iIndex[target(edge,iGraph)];
266 
267  if(first_edge) {
268  first_edge = false;
269  } else {
270  oStream<<", ";
271  }
272  oStream <<moduleName(in);
273 
274  auto iFound = m_edgeToPathMap.find(SimpleEdge(in,out));
275  bool pathDependencyOnly = true;
276  for(auto dependency : iFound->second) {
277  if (dependency == edm::graph::kDataDependencyIndex) {
278  pathDependencyOnly = false;
279  break;
280  }
281  }
282  if (pathDependencyOnly) {
283  oStream <<" after "<<moduleName(out)<<" [path "<<pathName(iFound->second[0])<<"]";
284  } else {
285  oStream <<" consumes "<<moduleName(out);
286  }
287  }
288  oStream<<"\n Running in the threaded framework would lead to indeterminate results."
289  "\n Please change order of modules in mentioned Path(s) to avoid inconsistent module ordering.";
290 
291  throw edm::Exception(edm::errors::ScheduleExecutionFailure, "Unrunnable schedule\n")
292  << oStream.str() << "\n";
293  }
294 
295  std::vector<Edge> findMinimumCycle(std::vector<Edge> const& iCycleEdges,
296  Graph const& iGraph) const {
297  //Remove unnecessary edges
298  // The graph library scans the verticies so we have edges in the list which are
299  // not part of the cycle but are associated to a vertex contributes to the cycle.
300  // To find these unneeded edges we work backwards on the edge list looking for cases
301  // where the 'in' on the previous edge is not the 'out' for the next edge. When this
302  // happens we know that there are additional edges for that same 'in' which can be
303  // removed.
304 
306  IndexMap const& index = get(boost::vertex_index, iGraph);
307 
308  std::vector<Edge> reducedEdges;
309  reducedEdges.reserve(iCycleEdges.size());
310  reducedEdges.push_back(iCycleEdges.back());
311  unsigned int lastIn = index[source(iCycleEdges.back(),iGraph)];
312  const unsigned int finalVertex = index[target(iCycleEdges.back(),iGraph)];
313  for( auto it = iCycleEdges.rbegin()+1; it != iCycleEdges.rend(); ++it) {
314  unsigned int in =index[source(*it,iGraph)];
315  unsigned int out =index[target(*it,iGraph)];
316  if(lastIn == out) {
317  reducedEdges.push_back(*it);
318  lastIn = in;
319  if(in == finalVertex) {
320  break;
321  }
322  }
323  }
324  std::reverse(reducedEdges.begin(), reducedEdges.end());
325 
326  return reducedEdges;
327  }
328 
329  void checkCycleForProblem(std::vector<Edge> const& iCycleEdges,
330  Graph const& iGraph) {
331  //For a real problem, we need at least one data dependency
332  // we already know we originate from a path because all tests
333  // require starting from the root node which connects to all paths
334  bool hasDataDependency =false;
335  //Since we are dealing with a circle, we initialize the 'last' info with the end of the graph
337  IndexMap const& index = get(boost::vertex_index, iGraph);
338 
339  unsigned int lastIn =index[source(iCycleEdges.back(),iGraph)];
340  unsigned int lastOut = index[target(iCycleEdges.back(),iGraph)];
341  bool lastEdgeHasDataDepencency = false;
342 
343  std::unordered_set<unsigned int> lastPathsSeen;
344 
345  //If a data dependency appears to make us jump off a path but that module actually
346  // appears on the path that was left, we need to see if we later come back to that
347  // path somewhere before that module. If not than it is a false cycle
348  std::unordered_multimap<unsigned int, unsigned int> pathToModulesWhichMustAppearLater;
349  bool moduleAppearedEarlierInPath = false;
350 
351  for(auto dependency: m_edgeToPathMap.find(SimpleEdge(lastIn,lastOut))->second) {
352  if(dependency !=edm::graph::kDataDependencyIndex) {
353  lastPathsSeen.insert(dependency);
354  } else {
355  lastEdgeHasDataDepencency = true;
356  }
357  }
358  for(auto const& edge: iCycleEdges) {
359  unsigned int in =index[source(edge,iGraph)];
360  unsigned int out =index[target(edge,iGraph)];
361 
362  auto iFound = m_edgeToPathMap.find(SimpleEdge(in,out));
363  std::unordered_set<unsigned int> pathsOnEdge;
364  bool edgeHasDataDependency = false;
365  for(auto dependency : iFound->second) {
366  if (dependency == edm::graph::kDataDependencyIndex) {
367  //need to count only if this moves us to a new path
368  hasDataDependency = true;
369  edgeHasDataDependency = true;
370  } else {
371  pathsOnEdge.insert(dependency);
372 
373  auto const& pathIndicies = m_pathIndexToModuleIndexOrder[dependency];
374  auto pathToCheckRange = pathToModulesWhichMustAppearLater.equal_range(dependency);
375  for(auto it = pathToCheckRange.first; it != pathToCheckRange.second; ) {
376  auto moduleIDToCheck = it->second;
377  if(moduleIDToCheck == in or moduleIDToCheck==out) {
378  auto toErase = it;
379  ++it;
380  pathToModulesWhichMustAppearLater.erase(toErase);
381  continue;
382  }
383  bool alreadyAdvanced = false;
384  for(auto index : pathIndicies) {
385  if(index == out) {
386  //we must have skipped over the module so the earlier worry about the
387  // module being called on the path was wrong
388  auto toErase = it;
389  ++it;
390  alreadyAdvanced =true;
391  pathToModulesWhichMustAppearLater.erase(toErase);
392  break;
393  }
394  if(index == moduleIDToCheck) {
395  //module still earlier on the path
396  break;
397  }
398  }
399  if(not alreadyAdvanced) {
400  ++it;
401  }
402  }
403  }
404  }
405  if((pathsOnEdge != lastPathsSeen) ) {
406  if((not edgeHasDataDependency) and (not lastEdgeHasDataDepencency) and (not lastPathsSeen.empty())) {
407  //If we jumped from one path to another without a data dependency
408  // than the cycle is just because two independent modules were
409  // scheduled in different arbirary order on different paths
410  return;
411  }
412  if(edgeHasDataDependency and not lastPathsSeen.empty()) {
413  //If the paths we were on had this module we are going to earlier
414  // on their paths than we do not have a real cycle
415  bool atLeastOnePathFailed = false;
416  std::vector<unsigned int> pathsToWatch;
417  pathsToWatch.reserve(lastPathsSeen.size());
418  for(auto seenPath: lastPathsSeen) {
419  if(pathsOnEdge.end() == pathsOnEdge.find(seenPath)) {
420  //we left this path so we now need to see if the module 'out'
421  // is on this path ahead of the module 'in'
422  bool foundOut = false;
423  for(auto index: m_pathIndexToModuleIndexOrder[seenPath]) {
424  if(index == out) {
425  foundOut = true;
426  pathsToWatch.push_back(seenPath);
427  }
428  if(index == lastOut) {
429  if(not foundOut) {
430  atLeastOnePathFailed = true;
431  }
432  break;
433  }
434  if(atLeastOnePathFailed) {
435  break;
436  }
437  }
438  }
439  }
440  //If all the paths have the module earlier in their paths
441  // then there was no need to jump between paths to get it
442  // and this breaks the data cycle
443  if(not atLeastOnePathFailed) {
444  moduleAppearedEarlierInPath = true;
445  for(auto p: pathsToWatch) {
446  pathToModulesWhichMustAppearLater.emplace(p,out);
447  }
448  }
449  }
450  lastPathsSeen = pathsOnEdge;
451  lastOut = out;
452  lastEdgeHasDataDepencency =edgeHasDataDependency;
453  }
454  }
455  if(moduleAppearedEarlierInPath and not pathToModulesWhichMustAppearLater.empty()) {
456  return;
457  }
458  if(not hasDataDependency) {
459  return;
460  }
461  throwOnError(iCycleEdges,index,iGraph);
462 
463  }
464 
465 
466  EdgeToPathMap const& m_edgeToPathMap;
467  std::vector<std::vector<unsigned int>> const& m_pathIndexToModuleIndexOrder;
468  std::vector<std::string> const& m_pathNames;
469  std::unordered_map<unsigned int,std::string> m_indexToNames;
470  std::unordered_map<unsigned int, std::vector<unsigned int>> m_pathToModuleIndex;
471 
472  std::vector<Edge> m_stack;
473  std::vector<std::vector<Edge>> m_fundamentalCycles;
474  std::set<unsigned int> m_verticiesInFundamentalCycles;
475  };
476 }
477 
478 
479 
480 void
481 edm::graph::throwIfImproperDependencies(EdgeToPathMap const& iEdgeToPathMap,
482  std::vector<std::vector<unsigned int>> const& iPathIndexToModuleIndexOrder,
483  std::vector<std::string> const& iPathNames,
484  std::unordered_map<unsigned int, std::string> const& iModuleIndexToNames) {
485 
486  //Now use boost graph library to find cycles in the dependencies
487  std::vector<SimpleEdge> outList;
488  outList.reserve(iEdgeToPathMap.size());
489  for(auto const& edgeInfo: iEdgeToPathMap) {
490  outList.push_back(edgeInfo.first);
491  }
492 
493  Graph g(outList.begin(),outList.end(), iModuleIndexToNames.size());
494 
495  cycle_detector detector(iEdgeToPathMap,iPathIndexToModuleIndexOrder,iPathNames,iModuleIndexToNames);
496  boost::depth_first_search(g,boost::visitor(detector));
497 }
type
Definition: HCALResponse.h:21
bool compare(const P &i, const P &j)
void throwIfImproperDependencies(EdgeToPathMap const &, std::vector< std::vector< unsigned int >> const &iPathIndexToModuleIndexOrder, std::vector< std::string > const &iPathNames, std::unordered_map< unsigned int, std::string > const &iModuleIndexToNames)
std::map< SimpleEdge, std::vector< unsigned int >> EdgeToPathMap
constexpr auto kDataDependencyIndex
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e g
Definition: Activities.doc:4
std::string moduleName(Provenance const &provenance)
Definition: Provenance.cc:27
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
stack
Definition: svgfig.py:558
static std::string const source
Definition: EdmProvDump.cc:43
std::pair< unsigned int, unsigned int > SimpleEdge
def move(src, dest)
Definition: eostools.py:510