14 import Alignment.MillePedeAlignmentAlgorithm.mpslib.Mpslibclass
as mpslib
19 lib = mpslib.jobdatabase()
25 eos =
'/afs/cern.ch/project/eos/installation/cms/bin/eos.select' 26 command = eos+
' ls -l '+lib.mssDir
27 eoslsoutput = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=
True).
split(
'\n')
30 for i
in xrange(len(lib.JOBID)):
64 if 'DISABLED' in lib.JOBSTATUS[i]:
67 if 'FETCH' in lib.JOBSTATUS[i]:
70 stdOut =
'jobData/'+lib.JOBDIR[i]+
'/STDOUT' 72 if os.access(stdOut+
'.gz', os.R_OK):
73 os.system(
'gunzip '+stdOut+
'.gz')
76 with open(stdOut,
"r") as STDFILE: 82 if re.search(re.compile(
'Unable to access quota space',re.M|re.I), line):
84 if re.search(re.compile(
'Unable to get quota space',re.M|re.I), line):
86 if re.search(re.compile(
'Disk quota exceeded',re.M|re.I), line):
88 if re.search(re.compile(
'CERN report: Job Killed',re.M), line):
90 if re.search(re.compile(
'Job finished',re.M), line):
92 if re.search(re.compile(
'connection timed out',re.M), line):
94 if re.search(re.compile(
'ConfigFileReadError',re.M), line):
96 if re.search(re.compile(
'0 bytes transferred',re.M), line):
98 if re.search(re.compile(
'command not found',re.M), line):
101 if re.search(re.compile(
'stage_put: Insufficient user privileges',re.M), line):
107 match = re.search(re.compile(
'This process used .+?(\d+) KSI2K seconds',re.M|re.I), line)
110 cputime =
int(round(
int(match.group(1))/cpuFactor))
113 print 'gzip -f '+stdOut
114 os.system(
'gzip -f '+stdOut)
116 if e.args == (2,
"No such file or directory"):
117 print "mps_check.py cannot find", stdOut,
"to test" 122 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/cmsRun.out' 123 if os.access(eazeLog, os.R_OK):
125 with open(eazeLog,
"r") as INFILE: 129 if re.search(re.compile(
'<StorageStatistics>',re.M), line):
131 if re.search(re.compile(
'Time limit reached\.',re.M), line):
133 if re.search(re.compile(
'gives I\/O problem',re.M), line):
135 if re.search(re.compile(
'FrameworkError ExitStatus=[\'\"]8001[\'\"]',re.M), line):
137 if re.search(re.compile(
'too many tracks',re.M), line):
139 if re.search(re.compile(
'segmentation violation',re.M), line):
141 if re.search(re.compile(
'failed RFIO error',re.M), line):
143 if re.search(re.compile(
'Request exceeds quota',re.M), line):
147 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/alignment.log' 150 if os.access(eazeLog+
'.gz', os.R_OK):
151 os.system(
'gunzip '+eazeLog+
'.gz')
154 if os.access(eazeLog, os.R_OK):
156 with open(eazeLog,
'r') as INFILE: 160 if re.search(re.compile(
'<StorageStatistics>',re.M), line):
162 if re.search(re.compile(
'EAZE\. Time limit reached\.',re.M), line):
164 if re.search(re.compile(
'GAF gives I\/O problem',re.M), line):
166 if re.search(re.compile(
'FrameworkError ExitStatus=[\'\"]8001[\'\"]',re.M), line):
168 if re.search(re.compile(
'too many tracks',re.M), line):
170 if re.search(re.compile(
'segmentation violation',re.M), line):
172 if re.search(re.compile(
'failed RFIO error',re.M), line):
174 if re.search(re.compile(
'Request exceeds quota',re.M), line):
177 if re.search(re.compile(
'Fatal Exception',re.M), line):
179 if re.search(re.compile(
'Exception caught in cmsRun',re.M), line):
182 if re.search(re.compile(
'AlignmentProducerAsAnalyzer::endJob\(\)',re.M), line):
184 if re.search(re.compile(
'FwkReport -i main_input:sourc',re.M), line):
186 nEvent =
int(array[5])
187 if nEvent==0
and re.search(re.compile(
'FwkReport -i PostSource',re.M), line):
189 nEvent =
int(array[5])
191 if nEvent==0
and re.search(re.compile(
'FwkReport -i AfterSource',re.M), line):
193 nEvent =
int(array[5])
195 if logZipped ==
'true':
196 os.system(
'gzip '+eazeLog)
199 print 'mps_check.py cannot find',eazeLog,
'to test' 205 milleOut =
'milleBinary%03d.dat' % (i+1)
211 for line
in eoslsoutput:
213 columns = line.split()
214 mOutSize = columns[4]
221 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/pede.dump' 222 if os.access(eazeLog+
'.gz', os.R_OK):
224 os.system(
'rm -f /tmp/pede.dump')
225 os.system(
'gunzip -c '+eazeLog+
'.gz > /tmp/pede.dump')
226 eazeLog =
'/tmp/pede.dump' 227 if os.access(eazeLog, os.R_OK):
228 with open(eazeLog,
"r") as INFILE: # open pede.dump 234 if re.search(re.compile(
'Millepede II.* ending',re.M), line):
237 match = re.search(re.compile(
'Peak dynamic memory allocation: (.+) GB',re.I), line)
240 mem = re.sub(
'\s',
'', mem)
242 if re.search(re.compile(
'^\d+\.\d+$',re.M), mem):
243 usedPedeMem =
float(mem)
245 print 'mps_check.py: Found Pede peak memory allocation but extracted number is not a float:',mem
249 if lib.pedeMem > 0
and usedPedeMem > 0.:
250 memoryratio = usedPedeMem /(lib.pedeMem/1024.)
253 if lib.pedeMem > 4000
and memoryratio < 0.75 :
254 msg = (
"Warning: {0:.2f} GB of memory for Pede " 255 "requested, but only {1:.1f}% of it has been " 256 "used! Consider to request less memory in order " 257 "to save resources.").
format(lib.pedeMem/1024.0,
261 msg = (
"Info: Used {0:.1f}% of {1:.2f} GB of memory " 262 "which has been requested for Pede.")
263 print msg.format(memoryratio*100, lib.pedeMem/1024.0)
267 if eazeLog ==
'/tmp/pede.dump':
268 os.system(
'rm /tmp/pede.dump')
272 print 'mps_check.py cannot find',eazeLog,
'to test' 275 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/millepede.log' 277 if os.access(eazeLog+
'.gz', os.R_OK):
278 os.system(
'gunzip '+eazeLog+
'.gz')
281 if os.access(eazeLog, os.R_OK):
283 with open(eazeLog,
"r") as INFILE: 287 if re.search(re.compile(
'step no descending',re.M), line):
289 pedeLogErrStr += line
290 if re.search(re.compile(
'Constraint equation discrepancies:',re.M), line):
292 pedeLogErrStr += line
294 if re.search(re.compile(
'insufficient constraint equations',re.M), line):
296 pedeLogWrnStr += line
298 if logZipped ==
'true':
299 os.system(
'gzip '+eazeLog)
301 print 'mps_check.py cannot find',eazeLog,
'to test' 305 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/millepede.end' 307 if os.access(eazeLog+
'.gz', os.R_OK):
308 os.system(
'gunzip'+eazeLog+
'.gz')
311 if os.access(eazeLog, os.R_OK):
313 with open(eazeLog,
"r") as INFILE: 318 match = re.search(re.compile(
'([-+]?\d+)',re.M), line)
320 if int(match.group(1)) == 1:
322 pedeLogWrnStr += line
323 elif int(match.group(1)) != 0:
325 pedeLogErrStr += line
326 if logZipped ==
'true':
327 os.system(
'gzip '+eazeLog)
329 print 'mps_check.py cannot find',eazeLog,
'to test' 337 print lib.JOBDIR[i],lib.JOBID[i],
'did not reach end of file' 340 print lib.JOBDIR[i],lib.JOBID[i],
'had quota space problem' 342 remark =
'eos quota space problem' 344 print lib.JOBDIR[i],lib.JOBID[i],
'had I/O problem' 347 print lib.JOBDIR[i],lib.JOBID[i],
'had Framework error 8001 problem' 348 remark =
'fwk error 8001' 351 print lib.JOBDIR[i],lib.JOBID[i],
'had connection timed out problem' 352 remark =
'connection timed out' 354 print lib.JOBDIR[i],lib.JOBID[i],
'had config file error' 355 remark =
'cfg file error' 358 print lib.JOBDIR[i],lib.JOBID[i],
'Job Killed (probably time exceeded)' 362 print lib.JOBDIR[i],lib.JOBID[i],
'ran into time limit' 364 if tooManyTracks == 1:
365 print lib.JOBDIR[i],lib.JOBID[i],
'too many tracks' 367 print lib.JOBDIR[i],lib.JOBID[i],
'SEGVIOL encountered' 371 print lib.JOBDIR[i],lib.JOBID[i],
'RFIO error encountered' 372 remark =
'rfio error' 375 print lib.JOBDIR[i],lib.JOBID[i],
'Request exceeds quota' 376 if exceptionCaught == 1:
377 print lib.JOBDIR[i],lib.JOBID[i],
'Exception caught in cmsrun' 378 remark =
'Exception caught' 381 print 'milleBinary???.dat file not found or empty' 382 remark =
'empty milleBinary' 383 if emptyDatOnFarm > 0:
384 print '...but already empty on farm so OK (or check job',i+1,
'yourself...)' 388 print lib.JOBDIR[i],lib.JOBID[i],
'Command not found' 389 remark =
'cmd not found' 392 print lib.JOBDIR[i],lib.JOBID[i],
'Insufficient privileges to rfcp files' 393 remark =
'Could not rfcp files' 396 print lib.JOBDIR[i],lib.JOBID[i],
'Pede did not end normally' 397 remark =
'pede failed' 400 print lib.JOBDIR[i],lib.JOBID[i],
'Problems in running Pede:' 402 remark =
'pede error' 406 print lib.JOBDIR[i],lib.JOBID[i],
'Warnings in running Pede:' 408 remark =
'pede warnings' 411 print lib.JOBDIR[i],lib.JOBID[i],
'Job not ended' 412 remark =
'job not ended' 417 print lib.JOBDIR[i],lib.JOBID[i],
' -------- ',okStatus
420 lib.JOBNEVT[i] = nEvent
422 lib.JOBSTATUS[i] = disabled+okStatus
425 lib.JOBRUNTIME[i] = cputime
427 lib.JOBREMARK[i] = remark