14 import Alignment.MillePedeAlignmentAlgorithm.mpslib.Mpslibclass
as mpslib
19 lib = mpslib.jobdatabase()
23 command = [
"ls",
"-l", os.path.join(lib.mssDir,
"binaries")]
25 eoslsoutput = subprocess.check_output(command, stderr=subprocess.STDOUT).
split(
'\n')
26 except subprocess.CalledProcessError:
30 for i
in xrange(len(lib.JOBID)):
65 if 'DISABLED' in lib.JOBSTATUS[i]:
68 if 'FETCH' in lib.JOBSTATUS[i]:
71 stdOut =
'jobData/'+lib.JOBDIR[i]+
'/STDOUT' 73 if os.access(stdOut+
'.gz', os.R_OK):
74 os.system(
'gunzip '+stdOut+
'.gz')
77 with open(stdOut,
"r") as STDFILE: 83 if re.search(re.compile(
'Unable to access quota space',re.M|re.I), line):
85 if re.search(re.compile(
'Unable to get quota space',re.M|re.I), line):
87 if re.search(re.compile(
'Disk quota exceeded',re.M|re.I), line):
89 if re.search(re.compile(
'CERN report: Job Killed',re.M), line):
91 if re.search(re.compile(
'Job finished',re.M), line):
93 if re.search(re.compile(
'connection timed out',re.M), line):
95 if re.search(re.compile(
'ConfigFileReadError',re.M), line):
97 if re.search(re.compile(
'0 bytes transferred',re.M), line):
99 if re.search(re.compile(
'command not found',re.M), line):
102 if re.search(re.compile(
'stage_put: Insufficient user privileges',re.M), line):
108 match = re.search(re.compile(
'This process used .+?(\d+) KSI2K seconds',re.M|re.I), line)
111 cputime =
int(round(
int(match.group(1))/cpuFactor))
114 print 'gzip -f '+stdOut
115 os.system(
'gzip -f '+stdOut)
117 if e.args == (2,
"No such file or directory"):
118 print "mps_check.py cannot find", stdOut,
"to test" 124 log_file = os.path.join(
"jobData", lib.JOBDIR[i],
"HTCJOB")
125 condor_log = subprocess.check_output([
"condor_q", lib.JOBID[i],
126 "-userlog", log_file,
131 stderr = subprocess.STDOUT)
132 condor_log = condor_log.split()
134 cputime =
int(round(
float(condor_log[0])))
136 if condor_log[1] ==
"3":
138 kill_reason =
" ".
join(condor_log[2:])
140 except subprocess.CalledProcessError
as e:
145 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/cmsRun.out' 146 if os.access(eazeLog, os.R_OK):
148 with open(eazeLog,
"r") as INFILE: 152 if re.search(re.compile(
'<StorageStatistics>',re.M), line):
154 if re.search(re.compile(
'Time limit reached\.',re.M), line):
156 if re.search(re.compile(
'gives I\/O problem',re.M), line):
158 if re.search(re.compile(
'FrameworkError ExitStatus=[\'\"]8001[\'\"]',re.M), line):
160 if re.search(re.compile(
'too many tracks',re.M), line):
162 if re.search(re.compile(
'segmentation violation',re.M), line):
164 if re.search(re.compile(
'failed RFIO error',re.M), line):
166 if re.search(re.compile(
'Request exceeds quota',re.M), line):
170 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/alignment.log' 173 if os.access(eazeLog+
'.gz', os.R_OK):
174 os.system(
'gunzip '+eazeLog+
'.gz')
177 if os.access(eazeLog, os.R_OK):
179 with open(eazeLog,
'r') as INFILE: 183 if re.search(re.compile(
'<StorageStatistics>',re.M), line):
185 if re.search(re.compile(
'EAZE\. Time limit reached\.',re.M), line):
187 if re.search(re.compile(
'GAF gives I\/O problem',re.M), line):
189 if re.search(re.compile(
'FrameworkError ExitStatus=[\'\"]8001[\'\"]',re.M), line):
191 if re.search(re.compile(
'too many tracks',re.M), line):
193 if re.search(re.compile(
'segmentation violation',re.M), line):
195 if re.search(re.compile(
'failed RFIO error',re.M), line):
197 if re.search(re.compile(
'Request exceeds quota',re.M), line):
200 if re.search(re.compile(
'Fatal Exception',re.M), line):
202 if re.search(re.compile(
'Exception caught in cmsRun',re.M), line):
205 if re.search(re.compile(
'AlignmentProducerAsAnalyzer::endJob\(\)',re.M), line):
207 if re.search(re.compile(
'FwkReport -i main_input:sourc',re.M), line):
209 nEvent =
int(array[5])
210 if nEvent==0
and re.search(re.compile(
'FwkReport -i PostSource',re.M), line):
212 nEvent =
int(array[5])
214 if nEvent==0
and re.search(re.compile(
'FwkReport -i AfterSource',re.M), line):
216 nEvent =
int(array[5])
218 if logZipped ==
'true':
219 os.system(
'gzip '+eazeLog)
222 print 'mps_check.py cannot find',eazeLog,
'to test' 228 milleOut =
'milleBinary%03d.dat' % (i+1)
234 for line
in eoslsoutput:
236 columns = line.split()
237 mOutSize = columns[4]
244 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/pede.dump' 245 if os.access(eazeLog+
'.gz', os.R_OK):
247 os.system(
'rm -f /tmp/pede.dump')
248 os.system(
'gunzip -c '+eazeLog+
'.gz > /tmp/pede.dump')
249 eazeLog =
'/tmp/pede.dump' 250 if os.access(eazeLog, os.R_OK):
251 with open(eazeLog,
"r") as INFILE: # open pede.dump 257 if re.search(re.compile(
'Millepede II.* ending',re.M), line):
260 match = re.search(re.compile(
'Peak dynamic memory allocation: (.+) GB',re.I), line)
263 mem = re.sub(
'\s',
'', mem)
265 if re.search(re.compile(
'^\d+\.\d+$',re.M), mem):
266 usedPedeMem =
float(mem)
268 print 'mps_check.py: Found Pede peak memory allocation but extracted number is not a float:',mem
272 if lib.pedeMem > 0
and usedPedeMem > 0.:
273 memoryratio = usedPedeMem /(lib.pedeMem/1024.)
276 if lib.pedeMem > 4000
and memoryratio < 0.75 :
277 msg = (
"Warning: {0:.2f} GB of memory for Pede " 278 "requested, but only {1:.1f}% of it has been " 279 "used! Consider to request less memory in order " 280 "to save resources.")
281 print msg.format(lib.pedeMem/1024.0, memoryratio*100)
282 elif memoryratio > 1 :
283 msg = (
"Warning: {0:.2f} GB of memory for Pede " 284 "requested, but {1:.1f}% of this has been " 285 "used! Consider to request more memory to avoid " 286 "premature removal of the job by the admin.")
287 print msg.format(lib.pedeMem/1024.0, memoryratio*100)
289 msg = (
"Info: Used {0:.1f}% of {1:.2f} GB of memory " 290 "which has been requested for Pede.")
291 print msg.format(memoryratio*100, lib.pedeMem/1024.0)
295 if eazeLog ==
'/tmp/pede.dump':
296 os.system(
'rm /tmp/pede.dump')
300 print 'mps_check.py cannot find',eazeLog,
'to test' 303 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/millepede.log' 305 if os.access(eazeLog+
'.gz', os.R_OK):
306 os.system(
'gunzip '+eazeLog+
'.gz')
309 if os.access(eazeLog, os.R_OK):
311 with open(eazeLog,
"r") as INFILE: 315 if re.search(re.compile(
'step no descending',re.M), line):
317 pedeLogErrStr += line
318 if re.search(re.compile(
'Constraint equation discrepancies:',re.M), line):
320 pedeLogErrStr += line
322 if re.search(re.compile(
'insufficient constraint equations',re.M), line):
324 pedeLogWrnStr += line
326 if logZipped ==
'true':
327 os.system(
'gzip '+eazeLog)
329 print 'mps_check.py cannot find',eazeLog,
'to test' 333 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/millepede.end' 335 if os.access(eazeLog+
'.gz', os.R_OK):
336 os.system(
'gunzip'+eazeLog+
'.gz')
339 if os.access(eazeLog, os.R_OK):
341 with open(eazeLog,
"r") as INFILE: 346 match = re.search(re.compile(
'([-+]?\d+)',re.M), line)
348 if int(match.group(1)) == 1:
350 pedeLogWrnStr += line
351 elif int(match.group(1)) != 0:
353 pedeLogErrStr += line
354 if logZipped ==
'true':
355 os.system(
'gzip '+eazeLog)
357 print 'mps_check.py cannot find',eazeLog,
'to test' 365 print lib.JOBDIR[i],lib.JOBID[i],
'did not reach end of file' 368 print lib.JOBDIR[i],lib.JOBID[i],
'had quota space problem' 370 remark =
'eos quota space problem' 372 print lib.JOBDIR[i],lib.JOBID[i],
'had I/O problem' 375 print lib.JOBDIR[i],lib.JOBID[i],
'had Framework error 8001 problem' 376 remark =
'fwk error 8001' 379 print lib.JOBDIR[i],lib.JOBID[i],
'had connection timed out problem' 380 remark =
'connection timed out' 382 print lib.JOBDIR[i],lib.JOBID[i],
'had config file error' 383 remark =
'cfg file error' 386 guess =
" (probably time exceeded)" if kill_reason
is None else ":" 387 print lib.JOBDIR[i], lib.JOBID[i],
"Job killed" + guess
388 if kill_reason
is not None:
print kill_reason
392 print lib.JOBDIR[i],lib.JOBID[i],
'ran into time limit' 394 if tooManyTracks == 1:
395 print lib.JOBDIR[i],lib.JOBID[i],
'too many tracks' 397 print lib.JOBDIR[i],lib.JOBID[i],
'SEGVIOL encountered' 401 print lib.JOBDIR[i],lib.JOBID[i],
'RFIO error encountered' 402 remark =
'rfio error' 405 print lib.JOBDIR[i],lib.JOBID[i],
'Request exceeds quota' 406 if exceptionCaught == 1:
407 print lib.JOBDIR[i],lib.JOBID[i],
'Exception caught in cmsrun' 408 remark =
'Exception caught' 411 print 'milleBinary???.dat file not found or empty' 412 remark =
'empty milleBinary' 413 if emptyDatOnFarm > 0:
414 print '...but already empty on farm so OK (or check job',i+1,
'yourself...)' 418 print lib.JOBDIR[i],lib.JOBID[i],
'Command not found' 419 remark =
'cmd not found' 422 print lib.JOBDIR[i],lib.JOBID[i],
'Insufficient privileges to rfcp files' 423 remark =
'Could not rfcp files' 426 print lib.JOBDIR[i],lib.JOBID[i],
'Pede did not end normally' 427 remark =
'pede failed' 430 print lib.JOBDIR[i],lib.JOBID[i],
'Problems in running Pede:' 432 remark =
'pede error' 436 print lib.JOBDIR[i],lib.JOBID[i],
'Warnings in running Pede:' 438 remark =
'pede warnings' 441 print lib.JOBDIR[i],lib.JOBID[i],
'Job not ended' 442 remark =
'job not ended' 447 print lib.JOBDIR[i],lib.JOBID[i],
' -------- ',okStatus
450 lib.JOBNEVT[i] = nEvent
452 lib.JOBSTATUS[i] = disabled+okStatus
454 lib.JOBRUNTIME[i] = cputime
456 lib.JOBREMARK[i] = remark
static std::string join(char **cmd)