CMS 3D CMS Logo

WatcherStreamFileReader.cc
Go to the documentation of this file.
5 
6 #include <cerrno>
7 #include <climits>
8 #include <cstdlib>
9 #include <cstdio>
10 #include <cstring>
11 #include <unistd.h>
12 #include <sys/types.h>
13 #include <sys/stat.h>
14 #include <sys/time.h>
15 #include <fcntl.h>
16 #include <libgen.h>
17 #include <fstream>
18 
19 //using namespace edm;
20 using namespace std;
21 
22 //std::string WatcherStreamFileReader::fileName_;
23 
24 #if !defined(__linux__) && !(defined(__APPLE__) && __DARWIN_C_LEVEL >= 200809L)
25 /* getline implementation is copied from glibc. */
26 
27 #ifndef SIZE_MAX
28 #define SIZE_MAX ((size_t)-1)
29 #endif
30 #ifndef SSIZE_MAX
31 #define SSIZE_MAX ((ssize_t)(SIZE_MAX / 2))
32 #endif
33 namespace {
34  ssize_t getline(char** lineptr, size_t* n, FILE* fp) {
35  ssize_t result = -1;
36  size_t cur_len = 0;
37 
38  if (lineptr == NULL || n == NULL || fp == NULL) {
39  errno = EINVAL;
40  return -1;
41  }
42 
43  if (*lineptr == NULL || *n == 0) {
44  *n = 120;
45  *lineptr = (char*)malloc(*n);
46  if (*lineptr == NULL) {
47  result = -1;
48  goto end;
49  }
50  }
51 
52  for (;;) {
53  int i;
54 
55  i = getc(fp);
56  if (i == EOF) {
57  result = -1;
58  break;
59  }
60 
61  /* Make enough space for len+1 (for final NUL) bytes. */
62  if (cur_len + 1 >= *n) {
63  size_t needed_max = SSIZE_MAX < SIZE_MAX ? (size_t)SSIZE_MAX + 1 : SIZE_MAX;
64  size_t needed = 2 * *n + 1; /* Be generous. */
65  char* new_lineptr;
66 
67  if (needed_max < needed)
68  needed = needed_max;
69  if (cur_len + 1 >= needed) {
70  result = -1;
71  goto end;
72  }
73 
74  new_lineptr = (char*)realloc(*lineptr, needed);
75  if (new_lineptr == NULL) {
76  result = -1;
77  goto end;
78  }
79 
80  *lineptr = new_lineptr;
81  *n = needed;
82  }
83 
84  (*lineptr)[cur_len] = i;
85  cur_len++;
86 
87  if (i == '\n')
88  break;
89  }
90  (*lineptr)[cur_len] = '\0';
91  result = cur_len ? (ssize_t)cur_len : result;
92 
93  end:
94  return result;
95  }
96 } // namespace
97 #endif
98 
99 static std::string now() {
100  struct timeval t;
101  gettimeofday(&t, nullptr);
102 
103  char buf[256];
104  strftime(buf, sizeof(buf), "%F %R %S s", localtime(&t.tv_sec));
105  buf[sizeof(buf) - 1] = 0;
106 
107  stringstream buf2;
108  buf2 << buf << " " << ((t.tv_usec + 500) / 1000) << " ms";
109 
110  return buf2.str();
111 }
112 
114  : inputDir_(pset.getParameter<std::string>("inputDir")),
115  filePatterns_(pset.getParameter<std::vector<std::string> >("filePatterns")),
116  inprocessDir_(pset.getParameter<std::string>("inprocessDir")),
117  processedDir_(pset.getParameter<std::string>("processedDir")),
118  corruptedDir_(pset.getParameter<std::string>("corruptedDir")),
119  tokenFile_(pset.getUntrackedParameter<std::string>("tokenFile", "watcherSourceToken")),
120  timeOut_(pset.getParameter<int>("timeOutInSec")),
121  end_(false),
122  verbosity_(pset.getUntrackedParameter<int>("verbosity", 0)) {
123  struct stat buf;
124  if (stat(tokenFile_.c_str(), &buf)) {
125  FILE* f = fopen(tokenFile_.c_str(), "w");
126  if (f) {
127  fclose(f);
128  } else {
129  throw cms::Exception("WatcherSource") << "Failed to create token file.";
130  }
131  }
132  vector<string> dirs;
133  dirs.push_back(inprocessDir_);
134  dirs.push_back(processedDir_);
135  dirs.push_back(corruptedDir_);
136 
137  for (unsigned i = 0; i < dirs.size(); ++i) {
138  const string& dir = dirs[i];
139  struct stat fileStat;
140  if (0 == stat(dir.c_str(), &fileStat)) {
141  if (!S_ISDIR(fileStat.st_mode)) {
142  throw cms::Exception("[WatcherSource]") << "File " << dir << " exists but is not a directory "
143  << " as expected.";
144  }
145  } else { //directory does not exists, let's try to create it
146  if (0 != mkdir(dir.c_str(), 0755)) {
147  throw cms::Exception("[WatcherSource]") << "Failed to create directory " << dir << " for writing data.";
148  }
149  }
150  }
151 
152  std::stringstream fileListCmdBuf;
153  fileListCmdBuf.str("");
154  // fileListCmdBuf << "/bin/ls -rt " << inputDir_ << " | egrep '(";
155  //by default ls will sort the file alphabetically which will results
156  //in ordering the files in increasing LB number, which is the desired
157  //order.
158  // fileListCmdBuf << "/bin/ls " << inputDir_ << " | egrep '(";
159  fileListCmdBuf << "/bin/find " << inputDir_ << " -maxdepth 2 -print | egrep '(";
160  //TODO: validate patternDir (see ;, &&, ||) and escape special character
161  if (filePatterns_.empty())
162  throw cms::Exception("WacherSource", "filePatterns parameter is empty");
163  char curDir[PATH_MAX > 0 ? PATH_MAX : 4096];
164  if (getcwd(curDir, sizeof(curDir)) == nullptr) {
165  throw cms::Exception("WatcherSource") << "Failed to retreived working directory path: " << strerror(errno);
166  }
167  curDir_ = curDir;
168 
169  for (unsigned i = 0; i < filePatterns_.size(); ++i) {
170  if (i > 0)
171  fileListCmdBuf << "|";
172  // if(filePatterns_[i].size()>0 && filePatterns_[0] != "/"){//relative path
173  // fileListCmdBuf << curDir << "/";
174  // }
175  fileListCmdBuf << filePatterns_[i];
176  }
177  fileListCmdBuf << ")' | sort";
178 
179  fileListCmd_ = fileListCmdBuf.str();
180 
181  cout << "[WatcherSource " << now() << "]"
182  << " Command to retrieve input files: " << fileListCmd_ << "\n";
183 }
184 
186 
189  return inputFile ? inputFile->newHeader() : false;
190 }
191 
194 
195  //TODO: shall better send an exception...
196  if (inputFile == nullptr) {
197  throw cms::Exception("WatcherSource") << "No input file found.";
198  }
199 
200  const InitMsgView* header = inputFile->startMessage();
201 
202  if (header->code() != Header::INIT) //INIT Msg
203  throw cms::Exception("readHeader", "WatcherStreamFileReader")
204  << "received wrong message type: expected INIT, got " << header->code() << "\n";
205 
206  return header;
207 }
208 
210  if (end_) {
211  closeFile();
212  return nullptr;
213  }
214 
216 
217  //go to next input file, till no new event is found
218  while ((inputFile = getInputFile()) != nullptr && inputFile->next() == 0) {
219  closeFile();
220  }
221 
222  return inputFile == nullptr ? nullptr : inputFile->currentRecord();
223 }
224 
226  char* lineptr = nullptr;
227  size_t n = 0;
228 
229  struct stat buf;
230 
231  if (stat(tokenFile_.c_str(), &buf) != 0) {
232  end_ = true;
233  }
234 
235  bool waiting = false;
236  static bool firstWait = true;
237  timeval waitStart;
238  //if no cached input file, look for new files until one is found:
239  if (!end_ && streamerInputFile_.get() == nullptr) {
240  fileName_.assign("");
241 
242  //check if we have file in the queue, if not look for new files:
243  while (filesInQueue_.empty()) {
244  if (stat(tokenFile_.c_str(), &buf) != 0) {
245  end_ = true;
246  break;
247  }
248  FILE* s = popen(fileListCmd_.c_str(), "r");
249  if (s == nullptr) {
250  throw cms::Exception("WatcherSource") << "Failed to retrieve list of input file: " << strerror(errno);
251  }
252 
253  ssize_t len;
254  while (!feof(s)) {
255  if ((len = getline(&lineptr, &n, s)) > 0) {
256  //remove end-of-line character:
257  lineptr[len - 1] = 0;
258  string fileName;
259  if (lineptr[0] != '/') {
260  if (!inputDir_.empty() && inputDir_[0] != '/') { //relative path
261  fileName.assign(curDir_);
262  fileName.append("/");
263  fileName.append(inputDir_);
264  } else {
265  fileName.assign(inputDir_);
266  }
267  fileName.append("/");
268  }
269  fileName.append(lineptr);
270  filesInQueue_.push_back(fileName);
271  if (verbosity_)
272  cout << "[WatcherSource " << now() << "]"
273  << " File to process: '" << fileName << "'\n";
274  }
275  }
276  while (!feof(s))
277  fgetc(s);
278  pclose(s);
279  if (filesInQueue_.empty()) {
280  if (!waiting) {
281  cout << "[WatcherSource " << now() << "]"
282  << " No file found. Waiting for new file...\n";
283  cout << flush;
284  waiting = true;
285  gettimeofday(&waitStart, nullptr);
286  } else if (!firstWait) {
287  timeval t;
288  gettimeofday(&t, nullptr);
289  float dt = (t.tv_sec - waitStart.tv_sec) * 1. + (t.tv_usec - waitStart.tv_usec) * 1.e-6;
290  if ((timeOut_ >= 0) && (dt > timeOut_)) {
291  cout << "[WatcherSource " << now() << "]"
292  << " Having waited for new file for " << (int)dt << " sec. "
293  << "Timeout exceeded. Exits.\n";
294  //remove(tokenFile_.c_str()); //we do not delete the token, otherwise sorting process on the monitoring farm will not be restarted by the runloop.sh script.
295  end_ = true;
296  break;
297  }
298  }
299  }
300  sleep(1);
301  } //end of file queue update
302  firstWait = false;
303  free(lineptr);
304  lineptr = nullptr;
305 
306  while (streamerInputFile_.get() == nullptr && !filesInQueue_.empty()) {
307  fileName_ = filesInQueue_.front();
308  filesInQueue_.pop_front();
309  int fd = open(fileName_.c_str(), 0);
310  if (fd != 0) {
311  struct stat buf;
312  off_t size = -1;
313  //check that file transfer is finished, by monitoring its size:
314  time_t t = time(nullptr);
315  for (;;) {
316  fstat(fd, &buf);
317  if (verbosity_)
318  cout << "file size: " << buf.st_size << ", prev size: " << size << "\n";
319  if (buf.st_size == size)
320  break;
321  else
322  size = buf.st_size;
323  if (difftime(t, buf.st_mtime) > 60)
324  break; //file older then 1 min=> tansfer must be finished
325  sleep(1);
326  }
327 
328  if (fd != 0 && buf.st_size == 0) { //file is empty. streamer reader
329  // does not like empty file=> skip it
330  stringstream c;
331  c << "/bin/mv -f \"" << fileName_ << "\" \"" << corruptedDir_ << "/.\"";
332  if (verbosity_)
333  cout << "[WatcherSource " << now() << "]"
334  << " Excuting " << c.str() << "\n";
335  int i = system(c.str().c_str());
336  if (i != 0) {
337  //throw cms::Exception("WatcherSource")
338  cout << "[WatcherSource " << now() << "] "
339  << "Failed to move empty file '" << fileName_ << "'"
340  << " to corrupted directory '" << corruptedDir_ << "'\n";
341  }
342  continue;
343  }
344 
345  close(fd);
346 
347  vector<char> buf1(fileName_.size() + 1);
348  copy(fileName_.begin(), fileName_.end(), buf1.begin());
349  buf1[buf1.size() - 1] = 0;
350 
351  vector<char> buf2(fileName_.size() + 1);
352  copy(fileName_.begin(), fileName_.end(), buf2.begin());
353  buf2[buf1.size() - 1] = 0;
354 
355  string dirnam(dirname(&buf1[0]));
356  string filenam(basename(&buf2[0]));
357 
358  string dest = inprocessDir_ + "/" + filenam;
359 
360  if (verbosity_)
361  cout << "[WatcherSource " << now() << "]"
362  << " Moving file " << fileName_ << " to " << dest << "\n";
363 
364  stringstream c;
365  c << "/bin/mv -f \"" << fileName_ << "\" \"" << dest << "/.\"";
366 
367  if (0 != rename(fileName_.c_str(), dest.c_str())) {
368  //if(0!=system(c.str().c_str())){
369  throw cms::Exception("WatcherSource")
370  << "Failed to move file '" << fileName_ << "' "
371  << "to processing directory " << inprocessDir_ << ": " << strerror(errno);
372  }
373 
374  fileName_ = dest;
375 
376  cout << "[WatcherSource " << now() << "]"
377  << " Opening file " << fileName_ << "\n"
378  << flush;
379  streamerInputFile_ = unique_ptr<edm::StreamerInputFile>(new edm::StreamerInputFile(fileName_));
380 
381  ofstream f(".watcherfile");
382  f << fileName_;
383  } else {
384  cout << "[WatcherSource " << now() << "]"
385  << " Failed to open file " << fileName_ << endl;
386  }
387  } //loop on file queue to find one file which opening succeeded
388  }
389  return streamerInputFile_.get();
390 }
391 
393  if (streamerInputFile_.get() == nullptr)
394  return;
395  //delete the streamer input file:
396  streamerInputFile_.reset();
397  stringstream cmd;
398  //TODO: validation of processDir
399  cmd << "/bin/mv -f \"" << fileName_ << "\" \"" << processedDir_ << "/.\"";
400  if (verbosity_)
401  cout << "[WatcherSource " << now() << "]"
402  << " Excuting " << cmd.str() << "\n";
403  int i = system(cmd.str().c_str());
404  if (i != 0) {
405  throw cms::Exception("WatcherSource") << "Failed to move processed file '" << fileName_ << "'"
406  << " to processed directory '" << processedDir_ << "'\n";
407  //Stop further processing to prevent endless loop:
408  end_ = true;
409  }
410  cout << flush;
411 }
std::vector< std::string > filePatterns_
size
Write out results.
float dt
Definition: AMPTWrapper.h:126
const EventMsgView * getNextEvent()
Definition: rename.py:1
def copy(args, dbName)
std::deque< std::string > filesInQueue_
#define NULL
Definition: scimark2.h:8
WatcherStreamFileReader(edm::ParameterSet const &pset)
const InitMsgView * getHeader()
#define SIZE_MAX
double f[11][100]
#define end
Definition: vmac.h:39
InitMsgView const * startMessage() const
edm::StreamerInputFile * getInputFile()
static std::string now()
std::unique_ptr< edm::StreamerInputFile > streamerInputFile_
#define SSIZE_MAX
def mkdir(path)
Definition: eostools.py:251
EventMsgView const * currentRecord() const
uint32 code() const
Definition: InitMessage.h:65
list cmd
Definition: mps_setup.py:244
dbl *** dir
Definition: mlp_gen.cc:35