CMS 3D CMS Logo

groupFilesInBlocks.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 from __future__ import print_function
4 import re,os,sys,shutil,math
5 import optparse
6 
7 copyargs = sys.argv[:]
8 for i in range(len(copyargs)):
9  if copyargs[i] == "":
10  copyargs[i] = "\"\""
11  if copyargs[i].find(" ") != -1:
12  copyargs[i] = "\"%s\"" % copyargs[i]
13 commandline = " ".join(copyargs)
14 
15 prog = sys.argv[0]
16 
17 usage='./%(prog)s NBLOCKS INFILE OUTFILE [options]\n'+\
18  'takes list of files produced by findQualityFiles.py as INFILE,\n'+\
19  'groups them into maximum NBLOCKS blocks with approximately similar #events.'
20 
21 
22 ######################################################
23 # To parse commandline args
24 
25 
26 parser=optparse.OptionParser(usage)
27 
28 parser.add_option("-v", "--verbose",
29  help="debug verbosity level",
30  type="int",
31  default=0,
32  dest="debug")
33 
34 options,args=parser.parse_args()
35 
36 if len(sys.argv) < 4:
37  raise SystemError("Too few arguments.\n\n"+parser.format_help())
38 
39 NBLOCKS = int(sys.argv[1])
40 INFILE = sys.argv[2]
41 OUTFILE = sys.argv[3]
42 
43 
44 
45 def makeJobBlock(mylist, evtn):
46  n = mylist[0][0]
47  block = [mylist[0]]
48  choosen = [0]
49  while n<evtn:
50  #print "n,evtn=",n,evtn
51  # find the biggest unused #evt that would give n<evtn
52  for i in range(len(mylist)):
53  # get last not choosen i
54  last_i=len(mylist)-1
55  while last_i in choosen: last_i += -1
56  if i==last_i:
57  #print i,"last element reached"
58  n += mylist[i][0]
59  #print " new last append: ",i, mylist[i][0], n
60  block.append(mylist[i])
61  choosen.append(i)
62  break
63  if i in choosen:
64  #print i," in choosen, continue..."
65  continue
66  if n+mylist[i][0]<evtn:
67  n += mylist[i][0]
68  #print " new append: ",i, mylist[i][0], n
69  block.append(mylist[i])
70  choosen.append(i)
71  break
72  if len(choosen)==len(mylist):
73  #print " got everything"
74  break
75  # pick up unused elements
76  newlist = []
77  for i in range(len(mylist)):
78  if not i in choosen:
79  newlist.append(mylist[i])
80  print("done makeJobBlock n =",n," len =",len(block))
81  return block, newlist, n
82 
83 
84 
85 comment1RE = re.compile (r'^#.+$')
86 fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*).*$')
87 #fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*),(\d*).*$')
88 
89 if not os.access(INFILE, os.F_OK):
90  print("Cannot find input file ", INFILE)
91  sys.exit()
92 
93 fin = open(INFILE, "r")
94 lines = fin.readlines()
95 fin.close()
96 
97 
98 eventsFiles = []
99 ntotal = 0
100 commentLines=[]
101 
102 for line in lines:
103  #line = comment1RE.sub ('', line)
104  #line = line.strip()
105  #if not line: continue
106  match = comment1RE.match(line)
107  if match:
108  commentLines.append(line)
109 
110  match = fileLineRE.match(line)
111  if match:
112  #print int(match.group(3)), str(match.group(1))
113  #eventsFiles.append((int(match.group(3)), str(match.group(1)), str(match.group(2))))
114  eventsFiles.append((int(match.group(2)), str(match.group(1))))
115  ntotal += int(match.group(2))
116  #else: print line,
117 
118 if len(eventsFiles)==0:
119  print("no file description strings found")
120  sys.exit()
121 
122 #print "len=", len(eventsFiles), ntotal
123 #tmp = set(eventsFiles)
124 #eventsFiles = list(tmp)
125 #ntotal = 0
126 #for ff in eventsFiles: ntotal += ff[0]
127 #print "len=", len(eventsFiles), ntotal
128 #sys.exit()
129 
130 eventsFiles.sort(reverse=True)
131 #print eventsFiles
132 
133 evtPerJob = int(math.ceil(float(ntotal)/NBLOCKS))
134 print("Total = ",ntotal, " per block =", evtPerJob,"(would give total of ", evtPerJob*NBLOCKS, ")", " list length =",len(eventsFiles))
135 if eventsFiles[0][0] > evtPerJob:
136  print("the biggest #evt is larger then #evt/block:",eventsFiles[0][0],">",evtPerJob)
137  print("consider lowering NBLOCKS")
138 
139 
140 jobsBlocks=[]
141 temp = eventsFiles
142 
143 tt = 0
144 for j in range(NBLOCKS):
145  print(j)
146  if len(temp)==0:
147  print("done!")
148  break
149  block, temp, nn = makeJobBlock(temp,evtPerJob)
150  tt+=nn
151  if len(block)>0:
152  jobsBlocks.append((block,nn))
153  print(block)
154  else:
155  print("empty block!")
156 
157 print(tt)
158 print(commandline)
159 
160 
161 fout = open(OUTFILE, mode="w")
162 
163 fout.write("### job-split file list produced by:\n")
164 fout.write("### "+commandline+"\n")
165 fout.write("### Total #evt= "+str(ntotal)+" #files ="+str(len(eventsFiles))+" per job #evt="
166  +str(evtPerJob)+" (would give total of"+str(evtPerJob*NBLOCKS)+")\n###\n")
167 fout.write("### previously produced by:\n")
168 fout.write("".join(commentLines))
169 fout.write("\nfileNamesBlocks = [\n")
170 
171 commax = ","
172 for b in range(len(jobsBlocks)):
173  fout.write(' [ # job '+str(b)+' with nevt='+str(jobsBlocks[b][1])+'\n')
174  comma = ","
175  for i in range(len(jobsBlocks[b][0])):
176  if i==len(jobsBlocks[b][0])-1:
177  comma=""
178  #fout.write(" '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][2]) +','+ str(jobsBlocks[b][0][i][0]) + "\n")
179  fout.write(" '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][0]) + "\n")
180  if b==len(jobsBlocks)-1:
181  commax=""
182  fout.write(' ]'+commax+'\n')
183 fout.write(']\n')
184 fout.close()
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:20
def makeJobBlock(mylist, evtn)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
#define str(s)