CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
WatcherStreamFileReader.cc
Go to the documentation of this file.
5 
6 #include <errno.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <cstring>
11 #include <unistd.h>
12 #include <sys/types.h>
13 #include <sys/stat.h>
14 #include <sys/time.h>
15 #include <fcntl.h>
16 #include <libgen.h>
17 #include <fstream>
18 
19 //using namespace edm;
20 using namespace std;
21 
23 
24 
25 #if !defined(__linux__) && !(defined(__APPLE__) && __DARWIN_C_LEVEL >= 200809L)
26 /* getline implementation is copied from glibc. */
27 
28 #ifndef SIZE_MAX
29 # define SIZE_MAX ((size_t) -1)
30 #endif
31 #ifndef SSIZE_MAX
32 # define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2))
33 #endif
34 namespace {
35 ssize_t getline (char **lineptr, size_t *n, FILE *fp)
36 {
37  ssize_t result = -1;
38  size_t cur_len = 0;
39 
40  if (lineptr == NULL || n == NULL || fp == NULL)
41  {
42  errno = EINVAL;
43  return -1;
44  }
45 
46  if (*lineptr == NULL || *n == 0)
47  {
48  *n = 120;
49  *lineptr = (char *) malloc (*n);
50  if (*lineptr == NULL)
51  {
52  result = -1;
53  goto end;
54  }
55  }
56 
57  for (;;)
58  {
59  int i;
60 
61  i = getc (fp);
62  if (i == EOF)
63  {
64  result = -1;
65  break;
66  }
67 
68  /* Make enough space for len+1 (for final NUL) bytes. */
69  if (cur_len + 1 >= *n)
70  {
71  size_t needed_max =
72  SSIZE_MAX < SIZE_MAX ? (size_t) SSIZE_MAX + 1 : SIZE_MAX;
73  size_t needed = 2 * *n + 1; /* Be generous. */
74  char *new_lineptr;
75 
76  if (needed_max < needed)
77  needed = needed_max;
78  if (cur_len + 1 >= needed)
79  {
80  result = -1;
81  goto end;
82  }
83 
84  new_lineptr = (char *) realloc (*lineptr, needed);
85  if (new_lineptr == NULL)
86  {
87  result = -1;
88  goto end;
89  }
90 
91  *lineptr = new_lineptr;
92  *n = needed;
93  }
94 
95  (*lineptr)[cur_len] = i;
96  cur_len++;
97 
98  if (i == '\n')
99  break;
100  }
101  (*lineptr)[cur_len] = '\0';
102  result = cur_len ? (ssize_t) cur_len : result;
103 
104 end:
105  return result;
106 }
107 }
108 #endif
109 
110 static std::string now(){
111  struct timeval t;
112  gettimeofday(&t, 0);
113 
114  char buf[256];
115  strftime(buf, sizeof(buf), "%F %R %S s", localtime(&t.tv_sec));
116  buf[sizeof(buf)-1] = 0;
117 
118  stringstream buf2;
119  buf2 << buf << " " << ((t.tv_usec+500)/1000) << " ms";
120 
121  return buf2.str();
122 }
123 
125  inputDir_(pset.getParameter<std::string>("inputDir")),
126  filePatterns_(pset.getParameter<std::vector<std::string> >("filePatterns")),
127  inprocessDir_(pset.getParameter<std::string>("inprocessDir")),
128  processedDir_(pset.getParameter<std::string>("processedDir")),
129  corruptedDir_(pset.getParameter<std::string>("corruptedDir")),
130  tokenFile_(pset.getUntrackedParameter<std::string>("tokenFile",
131  "watcherSourceToken")),
132  timeOut_(pset.getParameter<int>("timeOutInSec")),
133  end_(false),
134  verbosity_(pset.getUntrackedParameter<int>("verbosity", 0)){
135  struct stat buf;
136  if(stat(tokenFile_.c_str(), &buf)){
137  FILE* f = fopen(tokenFile_.c_str(), "w");
138  if(f){
139  fclose(f);
140  } else{
141  throw cms::Exception("WatcherSource") << "Failed to create token file.";
142  }
143  }
144  vector<string> dirs;
145  dirs.push_back(inprocessDir_);
146  dirs.push_back(processedDir_);
147  dirs.push_back(corruptedDir_);
148 
149  for(unsigned i = 0; i < dirs.size(); ++i){
150  const string& dir = dirs[i];
151  struct stat fileStat;
152  if(0==stat(dir.c_str(), &fileStat)){
153  if(!S_ISDIR(fileStat.st_mode)){
154  throw cms::Exception("[WatcherSource]")
155  << "File " << dir << " exists but is not a directory "
156  << " as expected.";
157  }
158  } else {//directory does not exists, let's try to create it
159  if(0!=mkdir(dir.c_str(), 0755)){
160  throw cms::Exception("[WatcherSource]")
161  << "Failed to create directory " << dir
162  << " for writing data.";
163  }
164  }
165  }
166 }
167 
169 }
170 
173  return inputFile?inputFile->newHeader():false;
174 }
175 
177 
179 
180  //TODO: shall better send an exception...
181  if(inputFile==0){
182  throw cms::Exception("WatcherSource") << "No input file found.";
183  }
184 
185  const InitMsgView* header = inputFile->startMessage();
186 
187  if(header->code() != Header::INIT) //INIT Msg
188  throw cms::Exception("readHeader","WatcherStreamFileReader")
189  << "received wrong message type: expected INIT, got "
190  << header->code() << "\n";
191 
192  return header;
193 }
194 
196  if(end_){ closeFile(); return 0;}
197 
199 
200  //go to next input file, till no new event is found
201  while((inputFile=getInputFile())!=0
202  && inputFile->next()==0){
203  closeFile();
204  }
205 
206  return inputFile==0?0:inputFile->currentRecord();
207 }
208 
210  char* lineptr = 0;
211  size_t n = 0;
212  static stringstream cmd;
213  static bool cmdSet = false;
214  static char curDir[PATH_MAX>0?PATH_MAX:4096];
215 
216  if(!cmdSet){
217  cmd.str("");
218  cmd << "/bin/ls -rt " << inputDir_ << " | egrep '(";
219  //TODO: validate patternDir (see ;, &&, ||) and escape special character
220  if(filePatterns_.size()==0) return 0;
221  if(getcwd(curDir, sizeof(curDir))==0){
222  throw cms::Exception("WatcherSource")
223  << "Failed to retreived working directory path: "
224  << strerror(errno);
225  }
226 
227  for(unsigned i = 0 ; i < filePatterns_.size(); ++i){
228  if(i>0) cmd << "|";
229  // if(filePatterns_[i].size()>0 && filePatterns_[0] != "/"){//relative path
230  // cmd << curDir << "/";
231  // }
232  cmd << filePatterns_[i];
233  }
234  cmd << ")'";
235 
236  cout << "[WatcherSource " << now() << "]"
237  << " Command to retrieve input files: "
238  << cmd.str() << "\n";
239  cmdSet = true;
240  }
241 
242  struct stat buf;
243 
244  if(stat(tokenFile_.c_str(), &buf)!=0){
245  end_ = true;
246  }
247 
248  bool waiting = false;
249  static bool firstWait = true;
250  timeval waitStart;
251  //if no cached input file, look for new files until one is found:
252  if(!end_ && streamerInputFile_.get()==0){
253  fileName_.assign("");
254 
255  //check if we have file in the queue, if not look for new files:
256  while(filesInQueue_.size()==0){
257  if(stat(tokenFile_.c_str(), &buf)!=0){
258  end_ = true;
259  break;
260  }
261  FILE* s = popen(cmd.str().c_str(), "r");
262  if(s==0){
263  throw cms::Exception("WatcherSource")
264  << "Failed to retrieve list of input file: " << strerror(errno);
265  }
266 
267  ssize_t len;
268  while(!feof(s)){
269  if((len=getline(&lineptr, &n, s))>0){
270  //remove end-of-line character:
271  lineptr[len-1] = 0;
272  string fileName;
273  if(inputDir_.size()>0 && inputDir_[0] != '/'){//relative path
274  fileName.assign(curDir);
275  fileName.append("/");
276  fileName.append(inputDir_);
277  } else{
278  fileName.assign(inputDir_);
279  }
280  fileName.append("/");
281  fileName.append(lineptr);
282  filesInQueue_.push_back(fileName);
283  if(verbosity_) cout << "[WatcherSource " << now() << "]"
284  << " File to process: '"
285  << fileName << "'\n";
286  }
287  }
288  while(!feof(s)) fgetc(s);
289  pclose(s);
290  if(filesInQueue_.size()==0){
291  if(!waiting){
292  cout << "[WatcherSource " << now() << "]"
293  << " No file found. Waiting for new file...\n";
294  cout << flush;
295  waiting = true;
296  gettimeofday(&waitStart, 0);
297  } else if(!firstWait){
298  timeval t;
299  gettimeofday(&t, 0);
300  float dt = (t.tv_sec-waitStart.tv_sec) * 1.
301  + (t.tv_usec-waitStart.tv_usec) * 1.e-6;
302  if((timeOut_ >= 0) && (dt > timeOut_)){
303  cout << "[WatcherSource " << now() << "]"
304  << " Having waited for new file for " << (int)dt << " sec. "
305  << "Timeout exceeded. Exits.\n";
306  //remove(tokenFile_.c_str()); //we do not delete the token, otherwise sorting process on the monitoring farm will not be restarted by the runloop.sh script.
307  end_ = true;
308  break;
309  }
310  }
311  }
312  sleep(1);
313  } //end of file queue update
314  firstWait = false;
315  free(lineptr); lineptr=0;
316 
317  while(streamerInputFile_.get()==0 && !filesInQueue_.empty()){
318 
319  fileName_ = filesInQueue_.front();
320  filesInQueue_.pop_front();
321  int fd = open(fileName_.c_str(), 0);
322  if(fd!=0){
323  struct stat buf;
324  off_t size = -1;
325  //check that file transfer is finished, by monitoring its size:
326  time_t t = time(0);
327  for(;;){
328  fstat(fd, &buf);
329  if(verbosity_) cout << "file size: " << buf.st_size << ", prev size: " << size << "\n";
330  if(buf.st_size==size) break; else size = buf.st_size;
331  if(difftime(t,buf.st_mtime)>60) break; //file older then 1 min=> tansfer must be finished
332  sleep(1);
333  }
334 
335  if(fd!=0 && buf.st_size == 0){//file is empty. streamer reader
336  // does not like empty file=> skip it
337  stringstream c;
338  c << "/bin/mv -f \"" << fileName_ << "\" \"" << corruptedDir_
339  << "/.\"";
340  if(verbosity_) cout << "[WatcherSource " << now() << "]"
341  << " Excuting "
342  << c.str() << "\n";
343  int i = system(c.str().c_str());
344  if(i!=0){
345  //throw cms::Exception("WatcherSource")
346  cout << "[WatcherSource " << now() << "] "
347  << "Failed to move empty file '" << fileName_ << "'"
348  << " to corrupted directory '" << corruptedDir_ << "'\n";
349  }
350  continue;
351  }
352 
353  close(fd);
354 
355  vector<char> buf1(fileName_.size()+1);
356  copy(fileName_.begin(), fileName_.end(), buf1.begin());
357  buf1[buf1.size()-1] = 0;
358 
359  vector<char> buf2(fileName_.size()+1);
360  copy(fileName_.begin(), fileName_.end(), buf2.begin());
361  buf2[buf1.size()-1] = 0;
362 
363  string dirnam(dirname(&buf1[0]));
364  string filenam(basename(&buf2[0]));
365 
366  string dest = inprocessDir_ + "/" + filenam;
367 
368  if(verbosity_) cout << "[WatcherSource " << now() << "]"
369  << " Moving file "
370  << fileName_ << " to " << dest << "\n";
371 
372  if(0!=rename(fileName_.c_str(), dest.c_str())){
373  throw cms::Exception("WatcherSource")
374  << "Failed to move file '" << fileName_ << "' "
375  << "to processing directory " << inprocessDir_
376  << ": " << strerror(errno);
377  }
378 
379  fileName_ = dest;
380 
381  cout << "[WatcherSource " << now() << "]"
382  << " Opening file " << fileName_ << "\n" << flush;
384  = auto_ptr<edm::StreamerInputFile>(new edm::StreamerInputFile(fileName_));
385 
386  ofstream f(".watcherfile");
387  f << fileName_;
388  } else{
389  cout << "[WatcherSource " << now() << "]"
390  << " Failed to open file " << fileName_ << endl;
391  }
392  } //loop on file queue to find one file which opening succeeded
393  }
394  return streamerInputFile_.get();
395 }
396 
398  if(streamerInputFile_.get()==0) return;
399  //delete the streamer input file:
400  streamerInputFile_.reset();
401  stringstream cmd;
402  //TODO: validation of processDir
403  cmd << "/bin/mv -f \"" << fileName_ << "\" \"" << processedDir_ << "/.\"";
404  if(verbosity_) cout << "[WatcherSource " << now() << "]"
405  << " Excuting " << cmd.str() << "\n";
406  int i = system(cmd.str().c_str());
407  if(i!=0){
408  throw cms::Exception("WatcherSource")
409  << "Failed to move processed file '" << fileName_ << "'"
410  << " to processed directory '" << processedDir_ << "'\n";
411  //Stop further processing to prevent endless loop:
412  end_ = true;
413  }
414  cout << flush;
415 }
std::vector< std::string > filePatterns_
float dt
Definition: AMPTWrapper.h:126
int i
Definition: DBlmapReader.cc:9
const EventMsgView * getNextEvent()
std::deque< std::string > filesInQueue_
#define NULL
Definition: scimark2.h:8
WatcherStreamFileReader(edm::ParameterSet const &pset)
const InitMsgView * getHeader()
std::auto_ptr< edm::StreamerInputFile > streamerInputFile_
#define SIZE_MAX
tuple result
Definition: query.py:137
string cmd
Definition: asciidump.py:19
double f[11][100]
#define end
Definition: vmac.h:37
InitMsgView const * startMessage() const
edm::StreamerInputFile * getInputFile()
static std::string now()
#define SSIZE_MAX
EventMsgView const * currentRecord() const
uint32 code() const
Definition: InitMessage.h:70
tuple cout
Definition: gather_cfg.py:121
dbl *** dir
Definition: mlp_gen.cc:35
volatile std::atomic< bool > shutdown_flag false
tuple size
Write out results.