CMS 3D CMS Logo

groupFilesInBlocks.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 
3 from __future__ import print_function
4 from builtins import range
5 import re,os,sys,shutil,math
6 import optparse
7 
8 copyargs = sys.argv[:]
9 for i in range(len(copyargs)):
10  if copyargs[i] == "":
11  copyargs[i] = "\"\""
12  if copyargs[i].find(" ") != -1:
13  copyargs[i] = "\"%s\"" % copyargs[i]
14 commandline = " ".join(copyargs)
15 
16 prog = sys.argv[0]
17 
18 usage='./%(prog)s NBLOCKS INFILE OUTFILE [options]\n'+\
19  'takes list of files produced by findQualityFiles.py as INFILE,\n'+\
20  'groups them into maximum NBLOCKS blocks with approximately similar #events.'
21 
22 
23 
25 
26 
27 parser=optparse.OptionParser(usage)
28 
29 parser.add_option("-v", "--verbose",
30  help="debug verbosity level",
31  type="int",
32  default=0,
33  dest="debug")
34 
35 options,args=parser.parse_args()
36 
37 if len(sys.argv) < 4:
38  raise SystemError("Too few arguments.\n\n"+parser.format_help())
39 
40 NBLOCKS = int(sys.argv[1])
41 INFILE = sys.argv[2]
42 OUTFILE = sys.argv[3]
43 
44 
45 
46 def makeJobBlock(mylist, evtn):
47  n = mylist[0][0]
48  block = [mylist[0]]
49  choosen = [0]
50  while n<evtn:
51  #print "n,evtn=",n,evtn
52  # find the biggest unused #evt that would give n<evtn
53  for i in range(len(mylist)):
54  # get last not choosen i
55  last_i=len(mylist)-1
56  while last_i in choosen: last_i += -1
57  if i==last_i:
58  #print i,"last element reached"
59  n += mylist[i][0]
60  #print " new last append: ",i, mylist[i][0], n
61  block.append(mylist[i])
62  choosen.append(i)
63  break
64  if i in choosen:
65  #print i," in choosen, continue..."
66  continue
67  if n+mylist[i][0]<evtn:
68  n += mylist[i][0]
69  #print " new append: ",i, mylist[i][0], n
70  block.append(mylist[i])
71  choosen.append(i)
72  break
73  if len(choosen)==len(mylist):
74  #print " got everything"
75  break
76  # pick up unused elements
77  newlist = []
78  for i in range(len(mylist)):
79  if not i in choosen:
80  newlist.append(mylist[i])
81  print("done makeJobBlock n =",n," len =",len(block))
82  return block, newlist, n
83 
84 
85 
86 comment1RE = re.compile (r'^#.+$')
87 fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*).*$')
88 #fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*),(\d*).*$')
89 
90 if not os.access(INFILE, os.F_OK):
91  print("Cannot find input file ", INFILE)
92  sys.exit()
93 
94 fin = open(INFILE, "r")
95 lines = fin.readlines()
96 fin.close()
97 
98 
99 eventsFiles = []
100 ntotal = 0
101 commentLines=[]
102 
103 for line in lines:
104  #line = comment1RE.sub ('', line)
105  #line = line.strip()
106  #if not line: continue
107  match = comment1RE.match(line)
108  if match:
109  commentLines.append(line)
110 
111  match = fileLineRE.match(line)
112  if match:
113  #print int(match.group(3)), str(match.group(1))
114  #eventsFiles.append((int(match.group(3)), str(match.group(1)), str(match.group(2))))
115  eventsFiles.append((int(match.group(2)), str(match.group(1))))
116  ntotal += int(match.group(2))
117  #else: print line,
118 
119 if len(eventsFiles)==0:
120  print("no file description strings found")
121  sys.exit()
122 
123 #print "len=", len(eventsFiles), ntotal
124 #tmp = set(eventsFiles)
125 #eventsFiles = list(tmp)
126 #ntotal = 0
127 #for ff in eventsFiles: ntotal += ff[0]
128 #print "len=", len(eventsFiles), ntotal
129 #sys.exit()
130 
131 eventsFiles.sort(reverse=True)
132 #print eventsFiles
133 
134 evtPerJob = int(math.ceil(float(ntotal)/NBLOCKS))
135 print("Total = ",ntotal, " per block =", evtPerJob,"(would give total of ", evtPerJob*NBLOCKS, ")", " list length =",len(eventsFiles))
136 if eventsFiles[0][0] > evtPerJob:
137  print("the biggest #evt is larger then #evt/block:",eventsFiles[0][0],">",evtPerJob)
138  print("consider lowering NBLOCKS")
139 
140 
141 jobsBlocks=[]
142 temp = eventsFiles
143 
144 tt = 0
145 for j in range(NBLOCKS):
146  print(j)
147  if len(temp)==0:
148  print("done!")
149  break
150  block, temp, nn = makeJobBlock(temp,evtPerJob)
151  tt+=nn
152  if len(block)>0:
153  jobsBlocks.append((block,nn))
154  print(block)
155  else:
156  print("empty block!")
157 
158 print(tt)
159 print(commandline)
160 
161 
162 fout = open(OUTFILE, mode="w")
163 
164 fout.write("### job-split file list produced by:\n")
165 fout.write("### "+commandline+"\n")
166 fout.write("### Total #evt= "+str(ntotal)+" #files ="+str(len(eventsFiles))+" per job #evt="
167  +str(evtPerJob)+" (would give total of"+str(evtPerJob*NBLOCKS)+")\n###\n")
168 fout.write("### previously produced by:\n")
169 fout.write("".join(commentLines))
170 fout.write("\nfileNamesBlocks = [\n")
171 
172 commax = ","
173 for b in range(len(jobsBlocks)):
174  fout.write(' [ # job '+str(b)+' with nevt='+str(jobsBlocks[b][1])+'\n')
175  comma = ","
176  for i in range(len(jobsBlocks[b][0])):
177  if i==len(jobsBlocks[b][0])-1:
178  comma=""
179  #fout.write(" '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][2]) +','+ str(jobsBlocks[b][0][i][0]) + "\n")
180  fout.write(" '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][0]) + "\n")
181  if b==len(jobsBlocks)-1:
182  commax=""
183  fout.write(' ]'+commax+'\n')
184 fout.write(']\n')
185 fout.close()
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:19
def makeJobBlock(mylist, evtn)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
#define str(s)