CMS 3D CMS Logo

groupFilesInBlocks.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 import re,os,sys,shutil,math
4 import optparse
5 
6 copyargs = sys.argv[:]
7 for i in range(len(copyargs)):
8  if copyargs[i] == "":
9  copyargs[i] = "\"\""
10  if copyargs[i].find(" ") != -1:
11  copyargs[i] = "\"%s\"" % copyargs[i]
12 commandline = " ".join(copyargs)
13 
14 prog = sys.argv[0]
15 
16 usage='./%(prog)s NBLOCKS INFILE OUTFILE [options]\n'+\
17  'takes list of files produced by findQualityFiles.py as INFILE,\n'+\
18  'groups them into maximum NBLOCKS blocks with approximately similar #events.'
19 
20 
21 ######################################################
22 # To parse commandline args
23 
24 
25 parser=optparse.OptionParser(usage)
26 
27 parser.add_option("-v", "--verbose",
28  help="debug verbosity level",
29  type="int",
30  default=0,
31  dest="debug")
32 
33 options,args=parser.parse_args()
34 
35 if len(sys.argv) < 4:
36  raise SystemError("Too few arguments.\n\n"+parser.format_help())
37 
38 NBLOCKS = int(sys.argv[1])
39 INFILE = sys.argv[2]
40 OUTFILE = sys.argv[3]
41 
42 
43 
44 def makeJobBlock(mylist, evtn):
45  n = mylist[0][0]
46  block = [mylist[0]]
47  choosen = [0]
48  while n<evtn:
49  #print "n,evtn=",n,evtn
50  # find the biggest unused #evt that would give n<evtn
51  for i in range(len(mylist)):
52  # get last not choosen i
53  last_i=len(mylist)-1
54  while last_i in choosen: last_i += -1
55  if i==last_i:
56  #print i,"last element reached"
57  n += mylist[i][0]
58  #print " new last append: ",i, mylist[i][0], n
59  block.append(mylist[i])
60  choosen.append(i)
61  break
62  if i in choosen:
63  #print i," in choosen, continue..."
64  continue
65  if n+mylist[i][0]<evtn:
66  n += mylist[i][0]
67  #print " new append: ",i, mylist[i][0], n
68  block.append(mylist[i])
69  choosen.append(i)
70  break
71  if len(choosen)==len(mylist):
72  #print " got everything"
73  break
74  # pick up unused elements
75  newlist = []
76  for i in range(len(mylist)):
77  if not i in choosen:
78  newlist.append(mylist[i])
79  print "done makeJobBlock n =",n," len =",len(block)
80  return block, newlist, n
81 
82 
83 
84 comment1RE = re.compile (r'^#.+$')
85 fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*).*$')
86 #fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*),(\d*).*$')
87 
88 if not os.access(INFILE, os.F_OK):
89  print "Cannot find input file ", INFILE
90  sys.exit()
91 
92 fin = open(INFILE, "r")
93 lines = fin.readlines()
94 fin.close()
95 
96 
97 eventsFiles = []
98 ntotal = 0
99 commentLines=[]
100 
101 for line in lines:
102  #line = comment1RE.sub ('', line)
103  #line = line.strip()
104  #if not line: continue
105  match = comment1RE.match(line)
106  if match:
107  commentLines.append(line)
108 
109  match = fileLineRE.match(line)
110  if match:
111  #print int(match.group(3)), str(match.group(1))
112  #eventsFiles.append((int(match.group(3)), str(match.group(1)), str(match.group(2))))
113  eventsFiles.append((int(match.group(2)), str(match.group(1))))
114  ntotal += int(match.group(2))
115  #else: print line,
116 
117 if len(eventsFiles)==0:
118  print "no file description strings found"
119  sys.exit()
120 
121 #print "len=", len(eventsFiles), ntotal
122 #tmp = set(eventsFiles)
123 #eventsFiles = list(tmp)
124 #ntotal = 0
125 #for ff in eventsFiles: ntotal += ff[0]
126 #print "len=", len(eventsFiles), ntotal
127 #sys.exit()
128 
129 eventsFiles.sort(reverse=True)
130 #print eventsFiles
131 
132 evtPerJob = int(math.ceil(float(ntotal)/NBLOCKS))
133 print "Total = ",ntotal, " per block =", evtPerJob,"(would give total of ", evtPerJob*NBLOCKS, ")", " list length =",len(eventsFiles)
134 if eventsFiles[0][0] > evtPerJob:
135  print "the biggest #evt is larger then #evt/block:",eventsFiles[0][0],">",evtPerJob
136  print "consider lowering NBLOCKS"
137 
138 
139 jobsBlocks=[]
140 temp = eventsFiles
141 
142 tt = 0
143 for j in range(NBLOCKS):
144  print j
145  if len(temp)==0:
146  print "done!"
147  break
148  block, temp, nn = makeJobBlock(temp,evtPerJob)
149  tt+=nn
150  if len(block)>0:
151  jobsBlocks.append((block,nn))
152  print block
153  else:
154  print "empty block!"
155 
156 print tt
157 print commandline
158 
159 
160 fout = open(OUTFILE, mode="w")
161 
162 fout.write("### job-split file list produced by:\n")
163 fout.write("### "+commandline+"\n")
164 fout.write("### Total #evt= "+str(ntotal)+" #files ="+str(len(eventsFiles))+" per job #evt="
165  +str(evtPerJob)+" (would give total of"+str(evtPerJob*NBLOCKS)+")\n###\n")
166 fout.write("### previously produced by:\n")
167 fout.write("".join(commentLines))
168 fout.write("\nfileNamesBlocks = [\n")
169 
170 commax = ","
171 for b in range(len(jobsBlocks)):
172  fout.write(' [ # job '+str(b)+' with nevt='+str(jobsBlocks[b][1])+'\n')
173  comma = ","
174  for i in range(len(jobsBlocks[b][0])):
175  if i==len(jobsBlocks[b][0])-1:
176  comma=""
177  #fout.write(" '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][2]) +','+ str(jobsBlocks[b][0][i][0]) + "\n")
178  fout.write(" '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][0]) + "\n")
179  if b==len(jobsBlocks)-1:
180  commax=""
181  fout.write(' ]'+commax+'\n')
182 fout.write(']\n')
183 fout.close()
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:20
def makeJobBlock(mylist, evtn)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18