CMS 3D CMS Logo

inputFilesetParser.py
Go to the documentation of this file.
1 import os,csv
2 from RecoLuminosity.LumiDB import csvSelectionParser,selectionParser,CommonUtil
3 def filehasHeader(f):
4  line=f.readline()
5  comps=line.split(',')
6  if comps and comps[0].lower()=='run':
7  return True
8  else:
9  return False
10 
12  def __init__(self,inputfilename):
13  filelist=inputfilename.split('+')
14  self.__inputresultfiles=filelist[0:-1]
15  self.__inputselectionfile=filelist[-1]
17  self.__inputResult=[]
19  if len(self.__inputselectionfile)!=0:
20  basename,extension=os.path.splitext(self.__inputselectionfile)
21  if extension=='.csv':#if file ends with .csv,use csv parser,else parse as json file
23  else:
24  selectf=open(self.__inputselectionfile,'r')
25  inputfilecontent=selectf.read()
27  if len(self.__inputresultfiles)!=0:
28  header=''
29  for f in self.__inputresultfiles:
30  ifile=open(f)
31  hasHeader=filehasHeader(ifile)
32  #hasHeader=csv.Sniffer().has_header(ifile.read(1024)) #sniffer doesn't work well , replace with custom
33  ifile.seek(0)
34  csvReader=csv.reader(ifile,delimiter=',')
35  irow=0
36  for row in csvReader:
37  if hasHeader and irow==0:
38  self.__inputResultHeader=row
39  else:
40  self.__inputResult.append(row)
41  irow=irow+1
42  ifile.close()
43  def resultheader(self):
44  return self.__inputResultHeader
45  def resultlines(self):
46  return self.__inputResult
47  def runsWithresult(self):
48  '''
49  output: [run,run,...]
50  '''
51  result={}
52  for f in self.__inputresultfiles:
53  csvReader=csv.reader(open(f),delimiter=',')
54  for row in csvReader:
55  field0=str(row[0]).strip()
56  if not CommonUtil.is_intstr(field0):
57  continue
58  runnumber=int(field0)
59  if runnumber not in result:
60  result[runnumber]=None
61  return result.keys()
63  '''
64  output: [run,run,...]
65  '''
66  result=[]
67  if len(self.__inputselectionfile)==0:#actually no selected
68  return result
69  else:
70  runswithresult=self.runsWithresult()
71  selectedruns=self.runs()
72  for r in selectedruns:
73  if r in runswithresult:
74  result.append(r)
75  return result
77  '''
78  output: [run,run,...]
79  '''
80  result=[]
81  if len(self.__inputselectionfile)==0:#actually no selected
82  return result
83  else:
84  runswithresult=self.runsWithresult()
85  selectedruns=self.runs()
86  for r in selectedruns:
87  if r not in runswithresult:
88  result.append(r)
89  return result
90  def selectionfilename(self):
91  '''return the input selection file name
92  '''
93  return self.__inputselectionfile
94  def mergeResultOnly(self):
95  '''if empty input selection filename give, I assume you only need to merge pieces of output result files into one
96  '''
97  return len(self.__inputselectionfile)==0
98  def resultfiles(self):
99  return self.__inputresultfiles
100  def resultHeader(self):
101  '''
102  output [headerfields]
103  '''
104  return self.__inputResultHeader
105  def resultInput(self):
106  '''
107  output [valuefields]
108  '''
109  return self.__inputResult
110  def fieldvalues(self,fieldname,fieldtype):
111  '''
112  given the input result field name and typem return the list of values
113  '''
114  fieldidx=None
115  result=[]
116  try:
117  fieldidx=self.__inputResultHeader.index(fieldname)
118  except:
119  print 'field ',fieldname,' not found'
120  raise
121  for r in self.__inputResult:
122  stringvalue=r[fieldidx]
123  if fieldtype in ['int','unsigned int']:
124  if not CommonUtil.is_intstr(stringvalue):
125  print 'field ',fieldname,' is not integer type'
126  raise
127  else:
128  result.append(int(stringvalue))
129  continue
130  elif fieldtype in ['float']:
131  if not CommonUtil.is_floatstr(stringvalue):
132  print 'field ',fieldname,' is not float type'
133  raise
134  else:
135  result.append(float(stringvalue))
136  contine
137  elif fieldtype in ['string','str']:
138  result.append(stringvalue)
139  else:
140  raise 'unsupported type ',fieldtype
141  return result
142  def fieldtotal(self,fieldname,fieldtype):
143  '''
144  given the input result field name and type, return the total
145  '''
146  fieldidx=None
147  result=0
148  try:
149  fieldidx=self.__inputResultHeader.index(fieldname)
150  except:
151  print 'field ',fieldname,' not found'
152  raise
153  for r in self.__inputResult:
154  stringvalue=r[fieldidx]
155  if fieldtype in ['int','unsigned int']:
156  if not CommonUtil.is_intstr(stringvalue):
157  print 'field ',fieldname,' is not integer type'
158  raise
159  else:
160  result=int(result)+int(stringvalue)
161  continue
162  elif fieldtype in ['float'] :
163  if not CommonUtil.is_floatstr(stringvalue):
164  print 'field ',fieldname,' is not float type'
165  raise
166  else:
167  result=float(result)+float(stringvalue)
168  continue
169  else:
170  raise 'cannot sum types other than int ,float'
171  return result
172  def runs(self):
174  return None
175  return self.__inputSelectionFileparsingResult.runs()
176  def runsandls(self):
178  return None
179  return self.__inputSelectionFileparsingResult.runsandls()
180  def runsandlsStr(self):
182  return None
183  return self.__inputSelectionFileparsingResult.runsandlsStr()
184 
185 if __name__ == '__main__':
186  result={}
187  filename='163664-v2-overview.csv+163665-v2-overview.csv+163668-v2-overview.csv+../json_DCSONLY.txt'
188  p=inputFilesetParser(filename)
189  print 'selection file ',p.selectionfilename()
190  print 'old result files ',p.resultfiles()
191  #print p.runs()
192  #print p.runsandls()
193  print 'do I only need to merge the results? ',p.mergeResultOnly()
194  resultheader=p.resultHeader()
195  print resultheader
196  print p.runsWithresult()
197  print 'selected runs with result ',p.selectedRunsWithresult()
198  print 'selected runs without result ',p.selectedRunsWithoutresult()
199  #result=p.resultInput()
200  alreadyprocessedRuns=p.fieldvalues('Run','int')
201  print 'runs already have results ', alreadyprocessedRuns
202  print 'total delivered ',p.fieldtotal('Delivered(/ub)','float')
203  print 'total recorded ',p.fieldtotal('Recorded(/ub)','float')
204  print 'result header ',p.resultheader()
205  print 'result lines ',p.resultlines()
206  #newrunsandls={}
207  #for run,cmslslist in p.runsandls().items():
208  # if run in alreadyprocessedRuns:
209  # continue
210  # else:
211  # newrunsandls[run]=cmslslist
212  #print 'runs and ls still need to be processed', newrunsandls
213  #filename='../test/lumi_900_output.json'
214  #p2=inputFilesetParser(filename)
215  #print 'result 2: ',p2.runs()
def is_intstr(s)
Definition: CommonUtil.py:120
def fieldvalues(self, fieldname, fieldtype)
def fieldtotal(self, fieldname, fieldtype)
def is_floatstr(s)
Definition: CommonUtil.py:128