CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
DQMFileIterator.cc
Go to the documentation of this file.
1 #include "DQMFileIterator.h"
4 
5 #include <boost/regex.hpp>
6 #include <boost/format.hpp>
7 #include <boost/range.hpp>
8 #include <boost/filesystem.hpp>
9 #include <boost/algorithm/string/predicate.hpp>
10 
11 #include <memory>
12 #include <string>
13 #include <iterator>
14 #include <boost/property_tree/json_parser.hpp>
15 #include <boost/property_tree/ptree.hpp>
16 
17 namespace dqmservices {
18 
20  const std::string& filename, int lumiNumber, unsigned int datafn_position) {
21  boost::property_tree::ptree pt;
22  read_json(filename, pt);
23 
25  lumi.filename = filename;
26 
27  // We rely on n_events to be the first item on the array...
28  lumi.n_events = std::next(pt.get_child("data").begin(), 1)
29  ->second.get_value<std::size_t>();
30 
31  lumi.file_ls = lumiNumber;
32  lumi.datafn = std::next(pt.get_child("data").begin(), datafn_position)
33  ->second.get_value<std::string>();
34 
35  return lumi;
36 }
37 
38 // Contents of Eor json file are ignored for the moment.
39 // This function will not be called.
41  const std::string& filename) {
42  boost::property_tree::ptree pt;
43  read_json(filename, pt);
44 
45  EorEntry eor;
46  eor.filename = filename;
47 
48  // We rely on n_events to be the first item on the array...
49  eor.n_events = std::next(pt.get_child("data").begin(), 1)
50  ->second.get_value<std::size_t>();
51  eor.n_lumi = std::next(pt.get_child("data").begin(), 2)
52  ->second.get_value<std::size_t>();
53 
54  eor.loaded = true;
55  return eor;
56 }
57 
59  : state_(EOR) {
60 
61  runNumber_ = pset.getUntrackedParameter<unsigned int>("runNumber");
62  datafnPosition_ = pset.getUntrackedParameter<unsigned int>("datafnPosition");
63  runInputDir_ = pset.getUntrackedParameter<std::string>("runInputDir");
64  streamLabel_ = pset.getUntrackedParameter<std::string>("streamLabel");
65  delayMillis_ = pset.getUntrackedParameter<uint32_t>("delayMillis");
67  pset.getUntrackedParameter<int32_t>("nextLumiTimeoutMillis");
68 
70  reset();
71 }
72 
74 
76  runPath_ = str(boost::format("%s/run%06d") % runInputDir_ % runNumber_);
77 
78  eor_.loaded = false;
79  state_ = State::OPEN;
80  nextLumiNumber_ = 1;
81  lumiSeen_.clear();
82  filesSeen_.clear();
83 
85 
86  collect(true);
87  update_state();
88 
89  if (mon_.isAvailable()) {
90  ptree doc;
91  doc.put("run", runNumber_);
92  doc.put("next_lumi", nextLumiNumber_);
93  doc.put("fi_state", std::to_string(state_));
94  mon_->outputUpdate(doc);
95  }
96 
97 }
98 
100 
103  advanceToLumi(nextLumiNumber_ + 1, "open: file iterator");
104  return lumi;
105 }
106 
108  if (lumiSeen_.find(nextLumiNumber_) != lumiSeen_.end()) {
109  return true;
110  }
111 
112  return false;
113 }
114 
115 unsigned int DQMFileIterator::runNumber() { return runNumber_; }
116 
118  if (!lumiSeen_.empty()) {
119  return lumiSeen_.rbegin()->first;
120  }
121 
122  return 1;
123 }
124 
125 void DQMFileIterator::advanceToLumi(unsigned int lumi, std::string reason) {
126  using boost::property_tree::ptree;
127  using boost::str;
128 
129  unsigned int currentLumi = nextLumiNumber_;
130 
133 
134  auto iter = lumiSeen_.lower_bound(currentLumi);
135 
136  while ((iter != lumiSeen_.end()) && ((iter->first) < nextLumiNumber_)) {
137  iter->second.state = reason;
138  monUpdateLumi(iter->second);
139 
140  ++iter;
141  }
142 
143  if (mon_.isAvailable()) {
144  // report the successful lumi file open
145  ptree doc;
146  doc.put("next_lumi", nextLumiNumber_);
147  mon_->outputUpdate(doc);
148  }
149 }
150 
152  if (! mon_.isAvailable())
153  return;
154 
155  ptree doc;
156  doc.put(str(boost::format("extra.lumi_seen.lumi%06d") % lumi.file_ls), lumi.state);
157  mon_->outputUpdate(doc);
158 }
159 
161  if (boost::starts_with(fn, "/")) return fn;
162 
164  p /= fn;
165  return p.string();
166 }
167 
168 void DQMFileIterator::collect(bool ignoreTimers) {
169  // search filesystem to find available lumi section files
170  // or the end of run files
171 
173  auto last_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
175 
176  // don't refresh if it's too soon
177  if ((!ignoreTimers) && (last_ms >= 0) && (last_ms < 100)) {
178  return;
179  }
180 
181  // check if directory changed
182  std::time_t mtime_now = boost::filesystem::last_write_time(runPath_);
183 
184  if ((!ignoreTimers) && (last_ms < forceFileCheckTimeoutMillis_) && (mtime_now == runPathMTime_)) {
185  //logFileAction("Directory hasn't changed.");
186  return;
187  } else {
188  //logFileAction("Directory changed, updating.");
189  }
190 
191  runPathMTime_ = mtime_now;
193 
194  using boost::filesystem::directory_iterator;
195  using boost::filesystem::directory_entry;
196 
197  std::string fn_eor;
198 
199  directory_iterator dend;
200  for (directory_iterator di(runPath_); di != dend; ++di) {
201  const boost::regex fn_re("run(\\d+)_ls(\\d+)_([a-zA-Z0-9]+)(_.*)?\\.jsn");
202 
203  const std::string filename = di->path().filename().string();
204  const std::string fn = di->path().string();
205 
206  if (filesSeen_.find(filename) != filesSeen_.end()) {
207  continue;
208  }
209 
210  boost::smatch result;
211  if (boost::regex_match(filename, result, fn_re)) {
212  unsigned int run = std::stoi(result[1]);
213  unsigned int lumi = std::stoi(result[2]);
214  std::string label = result[3];
215 
216  filesSeen_.insert(filename);
217 
218  if (run != runNumber_) continue;
219 
220  // check if this is EoR
221  // for various reasons we have to load it after all other files
222  if ((lumi == 0) && (label == "EoR") && (!eor_.loaded)) {
223  fn_eor = fn;
224  continue;
225  }
226 
227  // check if lumi is loaded
228  if (lumiSeen_.find(lumi) != lumiSeen_.end()) {
229  continue; // already loaded
230  }
231 
232  // check if this belongs to us
233  if (label != streamLabel_) {
234  std::string msg("Found and skipped json file (stream label mismatch, ");
235  msg += label + " [files] != " + streamLabel_ + " [config]";
236  msg += "): ";
237  logFileAction(msg, fn);
238  continue;
239  }
240 
241  try {
242  LumiEntry lumi_jsn = LumiEntry::load_json(fn, lumi, datafnPosition_);
243  lumiSeen_.emplace(lumi, lumi_jsn);
244  logFileAction("Found and loaded json file: ", fn);
245 
246  monUpdateLumi(lumi_jsn);
247  } catch (const std::exception& e) {
248  // don't reset the mtime, keep it waiting
249  filesSeen_.erase(filename);
250 
251  std::string msg("Found, tried to load the json, but failed (");
252  msg += e.what();
253  msg += "): ";
254  logFileAction(msg, fn);
255  }
256  }
257  }
258 
259  if (!fn_eor.empty()) {
260  logFileAction("EoR file found: ", fn_eor);
261 
262  // @TODO load EoR files correctly
263  // eor_ = EorEntry::load_json(fn_eor);
264  // logFileAction("Loaded eor file: ", fn_eor);
265 
266  // for now , set n_lumi to the highest _found_ lumi
267  eor_.loaded = true;
268 
269  if (lumiSeen_.empty()) {
270  eor_.n_lumi = 0;
271  } else {
272  eor_.n_lumi = lumiSeen_.rbegin()->first;
273  }
274  }
275 }
276 
278  using std::chrono::high_resolution_clock;
279  using std::chrono::duration_cast;
280  using std::chrono::milliseconds;
281 
282  collect(false);
283 
284  // now update the state
285  State old_state = state_;
286 
287  if ((state_ == State::OPEN) && (eor_.loaded)) {
288  state_ = State::EOR_CLOSING;
289  }
290 
291  // special case for missing lumi files
292  // skip to the next available, but after the timeout
293  if ((state_ != State::EOR) && (nextLumiTimeoutMillis_ >= 0)) {
294  auto iter = lumiSeen_.lower_bound(nextLumiNumber_);
295  if ((iter != lumiSeen_.end()) && iter->first != nextLumiNumber_) {
296 
297  auto elapsed = high_resolution_clock::now() - lastLumiLoad_;
298  auto elapsed_ms = duration_cast<milliseconds>(elapsed).count();
299 
300  if (elapsed_ms >= nextLumiTimeoutMillis_) {
301  std::string msg("Timeout reached, skipping lumisection(s) ");
302  msg += std::to_string(nextLumiNumber_) + " .. " +
303  std::to_string(iter->first - 1);
304  msg += ", nextLumiNumber_ is now " + std::to_string(iter->first);
305  logFileAction(msg);
306 
307  advanceToLumi(iter->first, "skipped: timeout");
308  }
309  }
310  }
311 
312  if (state_ == State::EOR_CLOSING) {
313  // check if we parsed all lumis
314  // n_lumi is both last lumi and the number of lumi
315  // since lumis are indexed from 1
316 
317  // after all lumi have been pop()'ed
318  // current lumi will become larger than the last lumi
319  if (nextLumiNumber_ > eor_.n_lumi) {
320  state_ = State::EOR;
321  }
322  }
323 
324  if (state_ != old_state) {
325  logFileAction("Streamer state changed: ",
326  std::to_string(old_state) + "->" + std::to_string(state_));
327 
328  if (mon_) {
329  ptree doc;
330  doc.put("fi_state", std::to_string(state_));
331  mon_->outputUpdate(doc);
332  }
333  }
334 }
335 
337  const std::string& fileName) const {
338  edm::LogAbsolute("fileAction") << std::setprecision(0) << edm::TimeOfDay()
339  << " " << msg << fileName;
341 }
342 
344  if (lumiSeen_.find(lumi.file_ls) != lumiSeen_.end()) {
345  lumiSeen_[lumi.file_ls].state = msg;
346 
348  } else {
349  logFileAction("Internal error: referenced lumi is not the map.");
350  }
351 }
352 
354  if (mon_.isAvailable())
355  mon_->keepAlive();
356 
357  usleep(delayMillis_ * 1000);
358 }
359 
361 
362  desc.addUntracked<unsigned int>("runNumber")
363  ->setComment("Run number passed via configuration file.");
364 
365  desc.addUntracked<unsigned int>("datafnPosition", 3)
366  ->setComment("Data filename position in the positional arguments array 'data' in json file.");
367 
368  desc.addUntracked<std::string>("streamLabel")
369  ->setComment("Stream label used in json discovery.");
370 
371  desc.addUntracked<uint32_t>("delayMillis")
372  ->setComment("Number of milliseconds to wait between file checks.");
373 
374  desc.addUntracked<int32_t>("nextLumiTimeoutMillis", -1)->setComment(
375  "Number of milliseconds to wait before switching to the next lumi "
376  "section if the current is missing, -1 to disable.");
377 
378  desc.addUntracked<std::string>("runInputDir")
379  ->setComment("Directory where the DQM files will appear.");
380 }
381 
382 } /* end of namespace */
T getUntrackedParameter(std::string const &, T const &) const
std::string make_path(const std::string &fn)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
void FlushMessageLog()
edm::Service< DQMMonitoringService > mon_
void logLumiState(const LumiEntry &lumi, const std::string &msg)
tuple lumi
Definition: fjr2json.py:35
std::map< unsigned int, LumiEntry > lumiSeen_
string format
Some error handling for the usage.
U second(std::pair< T, U > const &p)
std::string to_string(const T &t)
Definition: Logger.cc:26
DQMFileIterator(edm::ParameterSet const &pset)
tuple result
Definition: query.py:137
void logFileAction(const std::string &msg, const std::string &fileName="") const
static EorEntry load_json(const std::string &filename)
std::chrono::high_resolution_clock::time_point lastLumiLoad_
static LumiEntry load_json(const std::string &filename, int lumiNumber, unsigned int datafn_position)
tuple filename
Definition: lut2db_cfg.py:20
void monUpdateLumi(const LumiEntry &lumi)
std::chrono::high_resolution_clock::time_point runPathLastCollect_
std::unordered_set< std::string > filesSeen_
void advanceToLumi(unsigned int lumi, std::string reason)
static void fillDescription(edm::ParameterSetDescription &d)
void collect(bool ignoreTimers)