00001 import os,csv
00002 from RecoLuminosity.LumiDB import csvSelectionParser,selectionParser,CommonUtil
00003 def filehasHeader(f):
00004 line=f.readline()
00005 comps=line.split(',')
00006 if comps and comps[0].lower()=='run':
00007 return True
00008 else:
00009 return False
00010
00011 class inputFilesetParser(object):
00012 def __init__(self,inputfilename):
00013 filelist=inputfilename.split('+')
00014 self.__inputresultfiles=filelist[0:-1]
00015 self.__inputselectionfile=filelist[-1]
00016 self.__inputResultHeader=[]
00017 self.__inputResult=[]
00018 self.__inputSelectionFileparsingResult=None
00019 if len(self.__inputselectionfile)!=0:
00020 basename,extension=os.path.splitext(self.__inputselectionfile)
00021 if extension=='.csv':
00022 self.__inputSelectionFileparsingResult=csvSelectionParser.csvSelectionParser(self.__inputselectionfile)
00023 else:
00024 selectf=open(self.__inputselectionfile,'r')
00025 inputfilecontent=selectf.read()
00026 self.__inputSelectionFileparsingResult=selectionParser.selectionParser(inputfilecontent)
00027 if len(self.__inputresultfiles)!=0:
00028 header=''
00029 for f in self.__inputresultfiles:
00030 ifile=open(f)
00031 hasHeader=filehasHeader(ifile)
00032
00033 ifile.seek(0)
00034 csvReader=csv.reader(ifile,delimiter=',')
00035 irow=0
00036 for row in csvReader:
00037 if hasHeader and irow==0:
00038 self.__inputResultHeader=row
00039 else:
00040 self.__inputResult.append(row)
00041 irow=irow+1
00042 ifile.close()
00043 def resultheader(self):
00044 return self.__inputResultHeader
00045 def resultlines(self):
00046 return self.__inputResult
00047 def runsWithresult(self):
00048 '''
00049 output: [run,run,...]
00050 '''
00051 result={}
00052 for f in self.__inputresultfiles:
00053 csvReader=csv.reader(open(f),delimiter=',')
00054 for row in csvReader:
00055 field0=str(row[0]).strip()
00056 if not CommonUtil.is_intstr(field0):
00057 continue
00058 runnumber=int(field0)
00059 if not result.has_key(runnumber):
00060 result[runnumber]=None
00061 return result.keys()
00062 def selectedRunsWithresult(self):
00063 '''
00064 output: [run,run,...]
00065 '''
00066 result=[]
00067 if len(self.__inputselectionfile)==0:
00068 return result
00069 else:
00070 runswithresult=self.runsWithresult()
00071 selectedruns=self.runs()
00072 for r in selectedruns:
00073 if r in runswithresult:
00074 result.append(r)
00075 return result
00076 def selectedRunsWithoutresult(self):
00077 '''
00078 output: [run,run,...]
00079 '''
00080 result=[]
00081 if len(self.__inputselectionfile)==0:
00082 return result
00083 else:
00084 runswithresult=self.runsWithresult()
00085 selectedruns=self.runs()
00086 for r in selectedruns:
00087 if r not in runswithresult:
00088 result.append(r)
00089 return result
00090 def selectionfilename(self):
00091 '''return the input selection file name
00092 '''
00093 return self.__inputselectionfile
00094 def mergeResultOnly(self):
00095 '''if empty input selection filename give, I assume you only need to merge pieces of output result files into one
00096 '''
00097 return len(self.__inputselectionfile)==0
00098 def resultfiles(self):
00099 return self.__inputresultfiles
00100 def resultHeader(self):
00101 '''
00102 output [headerfields]
00103 '''
00104 return self.__inputResultHeader
00105 def resultInput(self):
00106 '''
00107 output [valuefields]
00108 '''
00109 return self.__inputResult
00110 def fieldvalues(self,fieldname,fieldtype):
00111 '''
00112 given the input result field name and typem return the list of values
00113 '''
00114 fieldidx=None
00115 result=[]
00116 try:
00117 fieldidx=self.__inputResultHeader.index(fieldname)
00118 except:
00119 print 'field ',fieldname,' not found'
00120 raise
00121 for r in self.__inputResult:
00122 stringvalue=r[fieldidx]
00123 if fieldtype in ['int','unsigned int']:
00124 if not CommonUtil.is_intstr(stringvalue):
00125 print 'field ',fieldname,' is not integer type'
00126 raise
00127 else:
00128 result.append(int(stringvalue))
00129 continue
00130 elif fieldtype in ['float']:
00131 if not CommonUtil.is_floatstr(stringvalue):
00132 print 'field ',fieldname,' is not float type'
00133 raise
00134 else:
00135 result.append(float(stringvalue))
00136 contine
00137 elif fieldtype in ['string','str']:
00138 result.append(stringvalue)
00139 else:
00140 raise 'unsupported type ',fieldtype
00141 return result
00142 def fieldtotal(self,fieldname,fieldtype):
00143 '''
00144 given the input result field name and type, return the total
00145 '''
00146 fieldidx=None
00147 result=0
00148 try:
00149 fieldidx=self.__inputResultHeader.index(fieldname)
00150 except:
00151 print 'field ',fieldname,' not found'
00152 raise
00153 for r in self.__inputResult:
00154 stringvalue=r[fieldidx]
00155 if fieldtype in ['int','unsigned int']:
00156 if not CommonUtil.is_intstr(stringvalue):
00157 print 'field ',fieldname,' is not integer type'
00158 raise
00159 else:
00160 result=int(result)+int(stringvalue)
00161 continue
00162 elif fieldtype in ['float'] :
00163 if not CommonUtil.is_floatstr(stringvalue):
00164 print 'field ',fieldname,' is not float type'
00165 raise
00166 else:
00167 result=float(result)+float(stringvalue)
00168 continue
00169 else:
00170 raise 'cannot sum types other than int ,float'
00171 return result
00172 def runs(self):
00173 if not self.__inputSelectionFileparsingResult:
00174 return None
00175 return self.__inputSelectionFileparsingResult.runs()
00176 def runsandls(self):
00177 if not self.__inputSelectionFileparsingResult:
00178 return None
00179 return self.__inputSelectionFileparsingResult.runsandls()
00180 def runsandlsStr(self):
00181 if not self.__inputSelectionFileparsingResult:
00182 return None
00183 return self.__inputSelectionFileparsingResult.runsandlsStr()
00184
00185 if __name__ == '__main__':
00186 result={}
00187 filename='163664-v2-overview.csv+163665-v2-overview.csv+163668-v2-overview.csv+../json_DCSONLY.txt'
00188 p=inputFilesetParser(filename)
00189 print 'selection file ',p.selectionfilename()
00190 print 'old result files ',p.resultfiles()
00191
00192
00193 print 'do I only need to merge the results? ',p.mergeResultOnly()
00194 resultheader=p.resultHeader()
00195 print resultheader
00196 print p.runsWithresult()
00197 print 'selected runs with result ',p.selectedRunsWithresult()
00198 print 'selected runs without result ',p.selectedRunsWithoutresult()
00199
00200 alreadyprocessedRuns=p.fieldvalues('Run','int')
00201 print 'runs already have results ', alreadyprocessedRuns
00202 print 'total delivered ',p.fieldtotal('Delivered(/ub)','float')
00203 print 'total recorded ',p.fieldtotal('Recorded(/ub)','float')
00204 print 'result header ',p.resultheader()
00205 print 'result lines ',p.resultlines()
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215