Go to the documentation of this file.00001
00002
00003 import re,os,sys,shutil,math
00004 import optparse
00005
00006 copyargs = sys.argv[:]
00007 for i in range(len(copyargs)):
00008 if copyargs[i] == "":
00009 copyargs[i] = "\"\""
00010 if copyargs[i].find(" ") != -1:
00011 copyargs[i] = "\"%s\"" % copyargs[i]
00012 commandline = " ".join(copyargs)
00013
00014 prog = sys.argv[0]
00015
00016 usage='./%(prog)s NBLOCKS INFILE OUTFILE [options]\n'+\
00017 'takes list of files produced by findQualityFiles.py as INFILE,\n'+\
00018 'groups them into maximum NBLOCKS blocks with approximately similar #events.'
00019
00020
00021
00022
00023
00024
00025 parser=optparse.OptionParser(usage)
00026
00027 parser.add_option("-v", "--verbose",
00028 help="debug verbosity level",
00029 type="int",
00030 default=0,
00031 dest="debug")
00032
00033 options,args=parser.parse_args()
00034
00035 if len(sys.argv) < 4:
00036 raise SystemError, "Too few arguments.\n\n"+parser.format_help()
00037
00038 NBLOCKS = int(sys.argv[1])
00039 INFILE = sys.argv[2]
00040 OUTFILE = sys.argv[3]
00041
00042
00043
00044 def makeJobBlock(mylist, evtn):
00045 n = mylist[0][0]
00046 block = [mylist[0]]
00047 choosen = [0]
00048 while n<evtn:
00049
00050
00051 for i in range(len(mylist)):
00052
00053 last_i=len(mylist)-1
00054 while last_i in choosen: last_i += -1
00055 if i==last_i:
00056
00057 n += mylist[i][0]
00058
00059 block.append(mylist[i])
00060 choosen.append(i)
00061 break
00062 if i in choosen:
00063
00064 continue
00065 if n+mylist[i][0]<evtn:
00066 n += mylist[i][0]
00067
00068 block.append(mylist[i])
00069 choosen.append(i)
00070 break
00071 if len(choosen)==len(mylist):
00072
00073 break
00074
00075 newlist = []
00076 for i in range(len(mylist)):
00077 if not i in choosen:
00078 newlist.append(mylist[i])
00079 print "done makeJobBlock n =",n," len =",len(block)
00080 return block, newlist, n
00081
00082
00083
00084 comment1RE = re.compile (r'^#.+$')
00085 fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*).*$')
00086
00087
00088 if not os.access(INFILE, os.F_OK):
00089 print "Cannot find input file ", INFILE
00090 sys.exit()
00091
00092 fin = open(INFILE, "r")
00093 lines = fin.readlines()
00094 fin.close()
00095
00096
00097 eventsFiles = []
00098 ntotal = 0
00099 commentLines=[]
00100
00101 for line in lines:
00102
00103
00104
00105 match = comment1RE.match(line)
00106 if match:
00107 commentLines.append(line)
00108
00109 match = fileLineRE.match(line)
00110 if match:
00111
00112
00113 eventsFiles.append((int(match.group(2)), str(match.group(1))))
00114 ntotal += int(match.group(2))
00115
00116
00117 if len(eventsFiles)==0:
00118 print "no file description strings found"
00119 sys.exit()
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129 eventsFiles.sort(reverse=True)
00130
00131
00132 evtPerJob = int(math.ceil(float(ntotal)/NBLOCKS))
00133 print "Total = ",ntotal, " per block =", evtPerJob,"(would give total of ", evtPerJob*NBLOCKS, ")", " list length =",len(eventsFiles)
00134 if eventsFiles[0][0] > evtPerJob:
00135 print "the biggest #evt is larger then #evt/block:",eventsFiles[0][0],">",evtPerJob
00136 print "consider lowering NBLOCKS"
00137
00138
00139 jobsBlocks=[]
00140 temp = eventsFiles
00141
00142 tt = 0
00143 for j in range(NBLOCKS):
00144 print j
00145 if len(temp)==0:
00146 print "done!"
00147 break
00148 block, temp, nn = makeJobBlock(temp,evtPerJob)
00149 tt+=nn
00150 if len(block)>0:
00151 jobsBlocks.append((block,nn))
00152 print block
00153 else:
00154 print "empty block!"
00155
00156 print tt
00157 print commandline
00158
00159
00160 fout = open(OUTFILE, mode="w")
00161
00162 fout.write("### job-split file list produced by:\n")
00163 fout.write("### "+commandline+"\n")
00164 fout.write("### Total #evt= "+str(ntotal)+" #files ="+str(len(eventsFiles))+" per job #evt="
00165 +str(evtPerJob)+" (would give total of"+str(evtPerJob*NBLOCKS)+")\n###\n")
00166 fout.write("### previously produced by:\n")
00167 fout.write("".join(commentLines))
00168 fout.write("\nfileNamesBlocks = [\n")
00169
00170 commax = ","
00171 for b in range(len(jobsBlocks)):
00172 fout.write(' [ # job '+str(b)+' with nevt='+str(jobsBlocks[b][1])+'\n')
00173 comma = ","
00174 for i in range(len(jobsBlocks[b][0])):
00175 if i==len(jobsBlocks[b][0])-1:
00176 comma=""
00177
00178 fout.write(" '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][0]) + "\n")
00179 if b==len(jobsBlocks)-1:
00180 commax=""
00181 fout.write(' ]'+commax+'\n')
00182 fout.write(']\n')
00183 fout.close()