14 import Alignment.MillePedeAlignmentAlgorithm.mpslib.Mpslibclass
as mpslib
19 lib = mpslib.jobdatabase()
25 eos =
'/afs/cern.ch/project/eos/installation/cms/bin/eos.select'
26 command = eos+
' ls -l '+lib.mssDir
27 eoslsoutput = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=
True).
split(
'\n')
30 for i
in xrange(len(lib.JOBID)):
64 if 'DISABLED' in lib.JOBSTATUS[i]:
67 if 'FETCH' in lib.JOBSTATUS[i]:
70 stdOut =
'jobData/'+lib.JOBDIR[i]+
'/STDOUT'
72 if os.access(stdOut+
'.gz', os.R_OK):
73 os.system(
'gunzip '+stdOut+
'.gz')
75 STDFILE = open(stdOut,
'r')
81 if re.search(re.compile(
'Unable to access quota space',re.M|re.I), line):
83 if re.search(re.compile(
'Unable to get quota space',re.M|re.I), line):
85 if re.search(re.compile(
'Disk quota exceeded',re.M|re.I), line):
87 if re.search(re.compile(
'CERN report: Job Killed',re.M), line):
89 if re.search(re.compile(
'Job finished',re.M), line):
91 if re.search(re.compile(
'connection timed out',re.M), line):
93 if re.search(re.compile(
'ConfigFileReadError',re.M), line):
95 if re.search(re.compile(
'0 bytes transferred',re.M), line):
97 if re.search(re.compile(
'command not found',re.M), line):
100 if re.search(re.compile(
'stage_put: Insufficient user privileges',re.M), line):
106 match = re.search(re.compile(
'This process used .+?(\d+) KSI2K seconds',re.M|re.I), line)
109 cputime = int(round(int(match.group(1))/cpuFactor))
113 print 'gzip -f '+stdOut
114 os.system(
'gzip -f '+stdOut)
117 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/cmsRun.out'
118 if os.access(eazeLog, os.R_OK):
120 INFILE = open(eazeLog,
'r')
124 if re.search(re.compile(
'<StorageStatistics>',re.M), line):
126 if re.search(re.compile(
'Time limit reached\.',re.M), line):
128 if re.search(re.compile(
'gives I\/O problem',re.M), line):
130 if re.search(re.compile(
'FrameworkError ExitStatus=[\'\"]8001[\'\"]',re.M), line):
132 if re.search(re.compile(
'too many tracks',re.M), line):
134 if re.search(re.compile(
'segmentation violation',re.M), line):
136 if re.search(re.compile(
'failed RFIO error',re.M), line):
138 if re.search(re.compile(
'Request exceeds quota',re.M), line):
143 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/alignment.log'
146 if os.access(eazeLog+
'.gz', os.R_OK):
147 os.system(
'gunzip '+eazeLog+
'.gz')
150 if os.access(eazeLog, os.R_OK):
152 INFILE = open(eazeLog,
'r')
156 if re.search(re.compile(
'<StorageStatistics>',re.M), line):
158 if re.search(re.compile(
'EAZE\. Time limit reached\.',re.M), line):
160 if re.search(re.compile(
'GAF gives I\/O problem',re.M), line):
162 if re.search(re.compile(
'FrameworkError ExitStatus=[\'\"]8001[\'\"]',re.M), line):
164 if re.search(re.compile(
'too many tracks',re.M), line):
166 if re.search(re.compile(
'segmentation violation',re.M), line):
168 if re.search(re.compile(
'failed RFIO error',re.M), line):
170 if re.search(re.compile(
'Request exceeds quota',re.M), line):
173 if re.search(re.compile(
'Fatal Exception',re.M), line):
175 if re.search(re.compile(
'Exception caught in cmsRun',re.M), line):
178 if re.search(re.compile(
'AlignmentProducer::endOfJob\(\)',re.M), line):
180 if re.search(re.compile(
'FwkReport -i main_input:sourc',re.M), line):
182 nEvent = int(array[5])
183 if nEvent==0
and re.search(re.compile(
'FwkReport -i PostSource',re.M), line):
185 nEvent = int(array[5])
187 if nEvent==0
and re.search(re.compile(
'FwkReport -i AfterSource',re.M), line):
189 nEvent = int(array[5])
192 if logZipped ==
'true':
193 os.system(
'gzip '+eazeLog)
196 print 'mps_check.py cannot find',eazeLog,
'to test'
202 milleOut =
'milleBinary%03d.dat' % (i+1)
208 for line
in eoslsoutput:
210 columns = line.split()
211 mOutSize = columns[4]
218 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/pede.dump'
219 if os.access(eazeLog+
'.gz', os.R_OK):
221 os.system(
'rm -f /tmp/pede.dump')
222 os.system(
'gunzip -c '+eazeLog+
'.gz > /tmp/pede.dump')
223 eazeLog =
'/tmp/pede.dump'
224 if os.access(eazeLog, os.R_OK):
225 INFILE = open(eazeLog,
'r') # open pede.dump
232 if re.search(re.compile(
'Millepede II.* ending',re.M), line):
235 match = re.search(re.compile(
'Peak dynamic memory allocation: (.+) GB',re.I), line)
238 mem = re.sub(
'\s',
'', mem)
240 if re.search(re.compile(
'^\d+\.\d+$',re.M), mem):
241 usedPedeMem = float(mem)
243 print 'mps_check.py: Found Pede peak memory allocation but extracted number is not a float:',mem
247 if lib.pedeMem > 0
and usedPedeMem > 0.:
248 memoryratio = usedPedeMem /(lib.pedeMem/1024.)
251 if lib.pedeMem > 4000
and memoryratio < 75. :
252 print 'Warning:',round(lib.pedeMem / 1024.,2),
'GB of memory for Pede requested, but only',round(memoryratio,1),
'\% of it has been used! Consider to request less memory in order to save resources.'
255 if eazeLog ==
'/tmp/pede.dump':
256 os.system(
'rm /tmp/pede.dump')
260 print 'mps_check.py cannot find',eazeLog,
'to test'
263 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/millepede.log'
265 if os.access(eazeLog+
'.gz', os.R_OK):
266 os.system(
'gunzip '+eazeLog+
'.gz')
269 if os.access(eazeLog, os.R_OK):
271 INFILE = open(eazeLog,
'r')
275 if re.search(re.compile(
'step no descending',re.M), line):
277 pedeLogErrStr += line
278 if re.search(re.compile(
'Constraint equation discrepancies:',re.M), line):
280 pedeLogErrStr += line
282 if re.search(re.compile(
'insufficient constraint equations',re.M), line):
284 pedeLogWrnStr += line
287 if logZipped ==
'true':
288 os.system(
'gzip '+eazeLog)
290 print 'mps_check.py cannot find',eazeLog,
'to test'
294 eazeLog =
'jobData/'+lib.JOBDIR[i]+
'/millepede.end'
296 if os.access(eazeLog+
'.gz', os.R_OK):
297 os.system(
'gunzip'+eazeLog+
'.gz')
300 if os.access(eazeLog, os.R_OK):
302 INFILE = open(eazeLog,
'r')
307 match = re.search(re.compile(
'([-+]?\d+)',re.M), line)
309 if int(match.group(1)) == 1:
311 pedeLogWrnStr += line
312 elif int(match.group(1)) != 0:
314 pedeLogErrStr += line
316 if logZipped ==
'true':
317 os.system(
'gzip '+eazeLog)
319 print 'mps_check.py cannot find',eazeLog,
'to test'
327 print lib.JOBDIR[i],lib.JOBID[i],
'did not reach end of file'
330 print lib.JOBDIR[i],lib.JOBID[i],
'had quota space problem'
332 remark =
'eos quota space problem'
334 print lib.JOBDIR[i],lib.JOBID[i],
'had I/O problem'
337 print lib.JOBDIR[i],lib.JOBID[i],
'had Framework error 8001 problem'
338 remark =
'fwk error 8001'
341 print lib.JOBDIR[i],lib.JOBID[i],
'had connection timed out problem'
342 remark =
'connection timed out'
344 print lib.JOBDIR[i],lib.JOBID[i],
'had config file error'
345 remark =
'cfg file error'
348 print lib.JOBDIR[i],lib.JOBID[i],
'Job Killed (probably time exceeded)'
352 print lib.JOBDIR[i],lib.JOBID[i],
'ran into time limit'
354 if tooManyTracks == 1:
355 print lib.JOBDIR[i],lib.JOBID[i],
'too many tracks'
357 print lib.JOBDIR[i],lib.JOBID[i],
'SEGVIOL encountered'
361 print lib.JOBDIR[i],lib.JOBID[i],
'RFIO error encountered'
362 remark =
'rfio error'
365 print lib.JOBDIR[i],lib.JOBID[i],
'Request exceeds quota'
366 if exceptionCaught == 1:
367 print lib.JOBDIR[i],lib.JOBID[i],
'Exception caught in cmsrun'
368 remark =
'Exception caught'
371 print 'milleBinary???.dat file not found or empty'
372 remark =
'empty milleBinary'
373 if emptyDatOnFarm > 0:
374 print '...but already empty on farm so OK (or check job',i+1,
'yourself...)'
378 print lib.JOBDIR[i],lib.JOBID[i],
'Command not found'
379 remark =
'cmd not found'
382 print lib.JOBDIR[i],lib.JOBID[i],
'Insufficient privileges to rfcp files'
383 remark =
'Could not rfcp files'
386 print lib.JOBDIR[i],lib.JOBID[i],
'Pede did not end normally'
387 remark =
'pede failed'
390 print lib.JOBDIR[i],lib.JOBID[i],
'Problems in running Pede:'
392 remark =
'pede error'
396 print lib.JOBDIR[i],lib.JOBID[i],
'Warnings in running Pede:'
398 remark =
'pede warnings'
401 print lib.JOBDIR[i],lib.JOBID[i],
'Job not ended'
402 remark =
'job not ended'
407 print lib.JOBDIR[i],lib.JOBID[i],
' -------- ',okStatus
410 lib.JOBNEVT[i] = nEvent
412 lib.JOBSTATUS[i] = disabled+okStatus
415 lib.JOBRUNTIME[i] = cputime
417 lib.JOBREMARK[i] = remark