CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_10/src/Alignment/MuonAlignmentAlgorithms/scripts/groupFilesInBlocks.py

Go to the documentation of this file.
00001 #! /usr/bin/env python
00002 
00003 import re,os,sys,shutil,math
00004 import optparse
00005 
00006 copyargs = sys.argv[:]
00007 for i in range(len(copyargs)):
00008     if copyargs[i] == "":
00009         copyargs[i] = "\"\""
00010     if copyargs[i].find(" ") != -1:
00011         copyargs[i] = "\"%s\"" % copyargs[i]
00012 commandline = " ".join(copyargs)
00013 
00014 prog = sys.argv[0]
00015 
00016 usage='./%(prog)s NBLOCKS INFILE OUTFILE [options]\n'+\
00017   'takes list of files produced by findQualityFiles.py as INFILE,\n'+\
00018   'groups them into maximum NBLOCKS blocks with approximately similar #events.'
00019 
00020 
00021 ######################################################
00022 # To parse commandline args
00023 
00024 
00025 parser=optparse.OptionParser(usage)
00026 
00027 parser.add_option("-v", "--verbose",
00028   help="debug verbosity level",
00029   type="int",
00030   default=0,
00031   dest="debug")
00032 
00033 options,args=parser.parse_args()
00034 
00035 if len(sys.argv) < 4:
00036     raise SystemError, "Too few arguments.\n\n"+parser.format_help()
00037 
00038 NBLOCKS = int(sys.argv[1])
00039 INFILE = sys.argv[2]
00040 OUTFILE = sys.argv[3]
00041 
00042 
00043 
00044 def makeJobBlock(mylist, evtn):
00045   n = mylist[0][0]
00046   block = [mylist[0]]
00047   choosen = [0]
00048   while n<evtn:
00049     #print "n,evtn=",n,evtn
00050     # find the biggest unused #evt that would give n<evtn
00051     for i in range(len(mylist)):
00052       # get last not choosen i
00053       last_i=len(mylist)-1
00054       while last_i in choosen: last_i += -1
00055       if i==last_i:
00056         #print i,"last element reached"
00057         n += mylist[i][0]
00058         #print "   new last append: ",i, mylist[i][0], n
00059         block.append(mylist[i])
00060         choosen.append(i)
00061         break
00062       if i in choosen:
00063         #print i,"  in choosen, continue..."
00064         continue
00065       if n+mylist[i][0]<evtn:
00066         n += mylist[i][0]
00067         #print "   new append: ",i, mylist[i][0], n
00068         block.append(mylist[i])
00069         choosen.append(i)
00070         break
00071     if len(choosen)==len(mylist):
00072       #print " got everything"
00073       break
00074   # pick up unused elements
00075   newlist = []
00076   for i in range(len(mylist)):
00077     if not i in choosen:
00078       newlist.append(mylist[i])
00079   print "done makeJobBlock n =",n," len =",len(block)
00080   return block, newlist, n
00081 
00082 
00083 
00084 comment1RE = re.compile (r'^#.+$')
00085 fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*).*$')
00086 #fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*),(\d*).*$')
00087 
00088 if not os.access(INFILE, os.F_OK): 
00089   print "Cannot find input file ", INFILE
00090   sys.exit()
00091 
00092 fin = open(INFILE, "r")
00093 lines = fin.readlines()
00094 fin.close()
00095 
00096 
00097 eventsFiles = []
00098 ntotal = 0
00099 commentLines=[]
00100 
00101 for line in lines:
00102   #line = comment1RE.sub ('', line)
00103   #line = line.strip()
00104   #if not line: continue
00105   match = comment1RE.match(line)
00106   if match:
00107     commentLines.append(line)
00108   
00109   match = fileLineRE.match(line)
00110   if match:
00111     #print int(match.group(3)), str(match.group(1))
00112     #eventsFiles.append((int(match.group(3)), str(match.group(1)), str(match.group(2))))
00113     eventsFiles.append((int(match.group(2)), str(match.group(1))))
00114     ntotal += int(match.group(2))
00115   #else: print line,
00116 
00117 if len(eventsFiles)==0:
00118   print "no file description strings found"
00119   sys.exit()
00120 
00121 #print "len=", len(eventsFiles), ntotal
00122 #tmp = set(eventsFiles)
00123 #eventsFiles = list(tmp)
00124 #ntotal = 0
00125 #for ff in eventsFiles:  ntotal += ff[0]
00126 #print "len=", len(eventsFiles), ntotal
00127 #sys.exit()
00128 
00129 eventsFiles.sort(reverse=True)
00130 #print eventsFiles
00131 
00132 evtPerJob = int(math.ceil(float(ntotal)/NBLOCKS))
00133 print "Total = ",ntotal, "  per block =", evtPerJob,"(would give total of ", evtPerJob*NBLOCKS, ")", "  list length =",len(eventsFiles)
00134 if eventsFiles[0][0] > evtPerJob:
00135   print "the biggest #evt is larger then #evt/block:",eventsFiles[0][0],">",evtPerJob
00136   print "consider lowering NBLOCKS"
00137 
00138 
00139 jobsBlocks=[]
00140 temp = eventsFiles
00141 
00142 tt = 0
00143 for j in range(NBLOCKS):
00144   print j
00145   if len(temp)==0:
00146     print "done!"
00147     break
00148   block, temp, nn = makeJobBlock(temp,evtPerJob)
00149   tt+=nn
00150   if len(block)>0:
00151     jobsBlocks.append((block,nn))
00152     print block
00153   else:
00154     print "empty block!"
00155   
00156 print tt
00157 print commandline
00158 
00159 
00160 fout = open(OUTFILE, mode="w")
00161 
00162 fout.write("### job-split file list produced by:\n")
00163 fout.write("### "+commandline+"\n")
00164 fout.write("### Total #evt= "+str(ntotal)+"  #files ="+str(len(eventsFiles))+"  per job #evt="
00165            +str(evtPerJob)+" (would give total of"+str(evtPerJob*NBLOCKS)+")\n###\n")
00166 fout.write("### previously produced by:\n")
00167 fout.write("".join(commentLines))
00168 fout.write("\nfileNamesBlocks = [\n")
00169 
00170 commax = ","
00171 for b in range(len(jobsBlocks)):
00172   fout.write('  [ # job '+str(b)+' with nevt='+str(jobsBlocks[b][1])+'\n')
00173   comma = ","
00174   for i in range(len(jobsBlocks[b][0])):
00175     if i==len(jobsBlocks[b][0])-1:
00176         comma=""
00177     #fout.write("    '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][2]) +','+ str(jobsBlocks[b][0][i][0]) + "\n")
00178     fout.write("    '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][0]) + "\n")
00179   if b==len(jobsBlocks)-1:
00180     commax=""
00181   fout.write('  ]'+commax+'\n')
00182 fout.write(']\n')
00183 fout.close()