CMS 3D CMS Logo

inputFilesetParser.py
Go to the documentation of this file.
1 from __future__ import print_function
2 import os,csv
3 from RecoLuminosity.LumiDB import csvSelectionParser,selectionParser,CommonUtil
4 def filehasHeader(f):
5  line=f.readline()
6  comps=line.split(',')
7  if comps and comps[0].lower()=='run':
8  return True
9  else:
10  return False
11 
13  def __init__(self,inputfilename):
14  filelist=inputfilename.split('+')
15  self.__inputresultfiles=filelist[0:-1]
16  self.__inputselectionfile=filelist[-1]
18  self.__inputResult=[]
20  if len(self.__inputselectionfile)!=0:
21  basename,extension=os.path.splitext(self.__inputselectionfile)
22  if extension=='.csv':#if file ends with .csv,use csv parser,else parse as json file
24  else:
25  selectf=open(self.__inputselectionfile,'r')
26  inputfilecontent=selectf.read()
28  if len(self.__inputresultfiles)!=0:
29  header=''
30  for f in self.__inputresultfiles:
31  ifile=open(f)
32  hasHeader=filehasHeader(ifile)
33  #hasHeader=csv.Sniffer().has_header(ifile.read(1024)) #sniffer doesn't work well , replace with custom
34  ifile.seek(0)
35  csvReader=csv.reader(ifile,delimiter=',')
36  irow=0
37  for row in csvReader:
38  if hasHeader and irow==0:
39  self.__inputResultHeader=row
40  else:
41  self.__inputResult.append(row)
42  irow=irow+1
43  ifile.close()
44  def resultheader(self):
45  return self.__inputResultHeader
46  def resultlines(self):
47  return self.__inputResult
48  def runsWithresult(self):
49  '''
50  output: [run,run,...]
51  '''
52  result={}
53  for f in self.__inputresultfiles:
54  csvReader=csv.reader(open(f),delimiter=',')
55  for row in csvReader:
56  field0=str(row[0]).strip()
57  if not CommonUtil.is_intstr(field0):
58  continue
59  runnumber=int(field0)
60  if runnumber not in result:
61  result[runnumber]=None
62  return result.keys()
64  '''
65  output: [run,run,...]
66  '''
67  result=[]
68  if len(self.__inputselectionfile)==0:#actually no selected
69  return result
70  else:
71  runswithresult=self.runsWithresult()
72  selectedruns=self.runs()
73  for r in selectedruns:
74  if r in runswithresult:
75  result.append(r)
76  return result
78  '''
79  output: [run,run,...]
80  '''
81  result=[]
82  if len(self.__inputselectionfile)==0:#actually no selected
83  return result
84  else:
85  runswithresult=self.runsWithresult()
86  selectedruns=self.runs()
87  for r in selectedruns:
88  if r not in runswithresult:
89  result.append(r)
90  return result
91  def selectionfilename(self):
92  '''return the input selection file name
93  '''
94  return self.__inputselectionfile
95  def mergeResultOnly(self):
96  '''if empty input selection filename give, I assume you only need to merge pieces of output result files into one
97  '''
98  return len(self.__inputselectionfile)==0
99  def resultfiles(self):
100  return self.__inputresultfiles
101  def resultHeader(self):
102  '''
103  output [headerfields]
104  '''
105  return self.__inputResultHeader
106  def resultInput(self):
107  '''
108  output [valuefields]
109  '''
110  return self.__inputResult
111  def fieldvalues(self,fieldname,fieldtype):
112  '''
113  given the input result field name and typem return the list of values
114  '''
115  fieldidx=None
116  result=[]
117  try:
118  fieldidx=self.__inputResultHeader.index(fieldname)
119  except:
120  print('field ',fieldname,' not found')
121  raise RuntimeError('field')
122  for r in self.__inputResult:
123  stringvalue=r[fieldidx]
124  if fieldtype in ['int','unsigned int']:
125  if not CommonUtil.is_intstr(stringvalue):
126  print('field ',fieldname,' is not integer type')
127  raise RuntimeError('field')
128  else:
129  result.append(int(stringvalue))
130  continue
131  elif fieldtype in ['float']:
132  if not CommonUtil.is_floatstr(stringvalue):
133  print('field ',fieldname,' is not float type')
134  raise RuntimeError('field')
135  else:
136  result.append(float(stringvalue))
137  contine
138  elif fieldtype in ['string','str']:
139  result.append(stringvalue)
140  else:
141  raise RuntimeError('unsupported type '+fieldtype)
142  return result
143  def fieldtotal(self,fieldname,fieldtype):
144  '''
145  given the input result field name and type, return the total
146  '''
147  fieldidx=None
148  result=0
149  try:
150  fieldidx=self.__inputResultHeader.index(fieldname)
151  except:
152  print('field ',fieldname,' not found')
153  raise
154  for r in self.__inputResult:
155  stringvalue=r[fieldidx]
156  if fieldtype in ['int','unsigned int']:
157  if not CommonUtil.is_intstr(stringvalue):
158  print('field ',fieldname,' is not integer type')
159  raise
160  else:
161  result=int(result)+int(stringvalue)
162  continue
163  elif fieldtype in ['float'] :
164  if not CommonUtil.is_floatstr(stringvalue):
165  print('field ',fieldname,' is not float type')
166  raise
167  else:
168  result=float(result)+float(stringvalue)
169  continue
170  else:
171  raise RunTimeError('cannot sum types other than int ,float')
172  return result
173  def runs(self):
175  return None
176  return self.__inputSelectionFileparsingResult.runs()
177  def runsandls(self):
179  return None
180  return self.__inputSelectionFileparsingResult.runsandls()
181  def runsandlsStr(self):
183  return None
184  return self.__inputSelectionFileparsingResult.runsandlsStr()
185 
186 if __name__ == '__main__':
187  result={}
188  filename='163664-v2-overview.csv+163665-v2-overview.csv+163668-v2-overview.csv+../json_DCSONLY.txt'
189  p=inputFilesetParser(filename)
190  print('selection file ',p.selectionfilename())
191  print('old result files ',p.resultfiles())
192  #print p.runs()
193  #print p.runsandls()
194  print('do I only need to merge the results? ',p.mergeResultOnly())
195  resultheader=p.resultHeader()
196  print(resultheader)
197  print(p.runsWithresult())
198  print('selected runs with result ',p.selectedRunsWithresult())
199  print('selected runs without result ',p.selectedRunsWithoutresult())
200  #result=p.resultInput()
201  alreadyprocessedRuns=p.fieldvalues('Run','int')
202  print('runs already have results ', alreadyprocessedRuns)
203  print('total delivered ',p.fieldtotal('Delivered(/ub)','float'))
204  print('total recorded ',p.fieldtotal('Recorded(/ub)','float'))
205  print('result header ',p.resultheader())
206  print('result lines ',p.resultlines())
207  #newrunsandls={}
208  #for run,cmslslist in p.runsandls().items():
209  # if run in alreadyprocessedRuns:
210  # continue
211  # else:
212  # newrunsandls[run]=cmslslist
213  #print 'runs and ls still need to be processed', newrunsandls
214  #filename='../test/lumi_900_output.json'
215  #p2=inputFilesetParser(filename)
216  #print 'result 2: ',p2.runs()
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def is_intstr(s)
Definition: CommonUtil.py:121
def fieldvalues(self, fieldname, fieldtype)
def fieldtotal(self, fieldname, fieldtype)
def is_floatstr(s)
Definition: CommonUtil.py:129
#define str(s)