CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_10_patch1/src/RecoLuminosity/LumiDB/python/inputFilesetParser.py

Go to the documentation of this file.
00001 import os,csv
00002 from RecoLuminosity.LumiDB import csvSelectionParser,selectionParser,CommonUtil
00003 def filehasHeader(f):
00004     line=f.readline()
00005     comps=line.split(',')
00006     if comps and comps[0].lower()=='run':
00007         return True
00008     else:
00009         return False
00010     
00011 class inputFilesetParser(object):
00012     def __init__(self,inputfilename):
00013         filelist=inputfilename.split('+')
00014         self.__inputresultfiles=filelist[0:-1]
00015         self.__inputselectionfile=filelist[-1]
00016         self.__inputResultHeader=[]
00017         self.__inputResult=[]
00018         self.__inputSelectionFileparsingResult=None
00019         if len(self.__inputselectionfile)!=0:
00020             basename,extension=os.path.splitext(self.__inputselectionfile)
00021             if extension=='.csv':#if file ends with .csv,use csv parser,else parse as json file
00022                 self.__inputSelectionFileparsingResult=csvSelectionParser.csvSelectionParser(self.__inputselectionfile)
00023             else:
00024                 selectf=open(self.__inputselectionfile,'r')
00025                 inputfilecontent=selectf.read()
00026                 self.__inputSelectionFileparsingResult=selectionParser.selectionParser(inputfilecontent)
00027         if len(self.__inputresultfiles)!=0:
00028             header=''
00029             for f in self.__inputresultfiles:
00030                 ifile=open(f)
00031                 hasHeader=filehasHeader(ifile)
00032                 #hasHeader=csv.Sniffer().has_header(ifile.read(1024)) #sniffer doesn't work well , replace with custom
00033                 ifile.seek(0)
00034                 csvReader=csv.reader(ifile,delimiter=',')
00035                 irow=0
00036                 for row in csvReader:
00037                     if hasHeader and irow==0:
00038                         self.__inputResultHeader=row
00039                     else:
00040                         self.__inputResult.append(row)
00041                     irow=irow+1
00042                 ifile.close()
00043     def resultheader(self):
00044         return self.__inputResultHeader
00045     def resultlines(self):
00046         return self.__inputResult
00047     def runsWithresult(self):
00048         '''
00049         output: [run,run,...]
00050         '''
00051         result={}
00052         for f in self.__inputresultfiles:
00053             csvReader=csv.reader(open(f),delimiter=',')
00054             for row in csvReader:
00055                 field0=str(row[0]).strip()
00056                 if not CommonUtil.is_intstr(field0):
00057                     continue
00058                 runnumber=int(field0)
00059                 if not result.has_key(runnumber):
00060                     result[runnumber]=None
00061         return result.keys()
00062     def selectedRunsWithresult(self):
00063         '''
00064         output: [run,run,...]
00065         '''
00066         result=[]
00067         if len(self.__inputselectionfile)==0:#actually no selected
00068             return result
00069         else:
00070             runswithresult=self.runsWithresult()
00071             selectedruns=self.runs()
00072             for r in selectedruns:
00073                 if r in runswithresult:
00074                     result.append(r)
00075         return result
00076     def selectedRunsWithoutresult(self):
00077         '''
00078         output: [run,run,...]
00079         '''
00080         result=[]
00081         if len(self.__inputselectionfile)==0:#actually no selected
00082             return result
00083         else:
00084             runswithresult=self.runsWithresult()
00085             selectedruns=self.runs()
00086             for r in selectedruns:
00087                 if r not in runswithresult:
00088                     result.append(r)
00089         return result
00090     def selectionfilename(self):
00091         '''return the input selection file name
00092         '''
00093         return self.__inputselectionfile
00094     def mergeResultOnly(self):
00095         '''if empty input selection filename give, I assume you only need to merge pieces of output result files into one 
00096         '''
00097         return len(self.__inputselectionfile)==0
00098     def resultfiles(self):
00099         return self.__inputresultfiles
00100     def resultHeader(self):
00101         '''
00102         output [headerfields]
00103         '''
00104         return self.__inputResultHeader
00105     def resultInput(self):
00106         '''
00107         output [valuefields]
00108         '''
00109         return self.__inputResult
00110     def fieldvalues(self,fieldname,fieldtype):
00111         '''
00112         given the input result field name and typem return the list of values
00113         '''
00114         fieldidx=None
00115         result=[]
00116         try:
00117             fieldidx=self.__inputResultHeader.index(fieldname)
00118         except:
00119             print 'field ',fieldname,' not found'
00120             raise
00121         for r in self.__inputResult:
00122             stringvalue=r[fieldidx]
00123             if fieldtype in ['int','unsigned int']:
00124                 if not CommonUtil.is_intstr(stringvalue):
00125                     print 'field ',fieldname,' is not integer type'
00126                     raise
00127                 else:
00128                     result.append(int(stringvalue))
00129                     continue
00130             elif fieldtype in ['float']:
00131                 if not CommonUtil.is_floatstr(stringvalue):
00132                     print 'field ',fieldname,' is not float type'
00133                     raise
00134                 else:
00135                     result.append(float(stringvalue))
00136                     contine
00137             elif  fieldtype in ['string','str']:
00138                 result.append(stringvalue)
00139             else:
00140                 raise 'unsupported type ',fieldtype
00141         return result
00142     def fieldtotal(self,fieldname,fieldtype):
00143         '''
00144         given the input result field name and type, return the total
00145         '''
00146         fieldidx=None
00147         result=0
00148         try:
00149             fieldidx=self.__inputResultHeader.index(fieldname)
00150         except:
00151             print 'field ',fieldname,' not found'
00152             raise
00153         for r in self.__inputResult:
00154             stringvalue=r[fieldidx]
00155             if fieldtype in ['int','unsigned int']:
00156                 if not CommonUtil.is_intstr(stringvalue):
00157                     print 'field ',fieldname,' is not integer type'
00158                     raise
00159                 else:
00160                     result=int(result)+int(stringvalue)
00161                     continue
00162             elif fieldtype in ['float'] :
00163                 if not CommonUtil.is_floatstr(stringvalue):
00164                     print 'field ',fieldname,' is not float type'
00165                     raise
00166                 else:
00167                     result=float(result)+float(stringvalue)
00168                     continue
00169             else:
00170                 raise 'cannot sum types other than int ,float'
00171         return result
00172     def runs(self):
00173         if not self.__inputSelectionFileparsingResult:
00174             return None
00175         return self.__inputSelectionFileparsingResult.runs()
00176     def runsandls(self):
00177         if not self.__inputSelectionFileparsingResult:
00178             return None
00179         return self.__inputSelectionFileparsingResult.runsandls()
00180     def runsandlsStr(self):
00181         if not self.__inputSelectionFileparsingResult:
00182             return None
00183         return self.__inputSelectionFileparsingResult.runsandlsStr()
00184     
00185 if __name__ == '__main__':
00186     result={}
00187     filename='163664-v2-overview.csv+163665-v2-overview.csv+163668-v2-overview.csv+../json_DCSONLY.txt'
00188     p=inputFilesetParser(filename)
00189     print 'selection file ',p.selectionfilename()
00190     print 'old result files ',p.resultfiles()
00191     #print p.runs()
00192     #print p.runsandls()
00193     print 'do I only need to merge the results? ',p.mergeResultOnly()
00194     resultheader=p.resultHeader()
00195     print resultheader
00196     print p.runsWithresult()
00197     print 'selected runs with result ',p.selectedRunsWithresult()
00198     print 'selected runs without result ',p.selectedRunsWithoutresult()
00199     #result=p.resultInput()
00200     alreadyprocessedRuns=p.fieldvalues('Run','int')
00201     print 'runs already have results ', alreadyprocessedRuns
00202     print 'total delivered ',p.fieldtotal('Delivered(/ub)','float')
00203     print 'total recorded ',p.fieldtotal('Recorded(/ub)','float')
00204     print 'result header ',p.resultheader()
00205     print 'result lines ',p.resultlines()
00206     #newrunsandls={}
00207     #for run,cmslslist in p.runsandls().items():
00208     #    if run in alreadyprocessedRuns:
00209     #        continue
00210     #    else:
00211     #        newrunsandls[run]=cmslslist
00212     #print 'runs and ls still need to be processed', newrunsandls
00213     #filename='../test/lumi_900_output.json'
00214     #p2=inputFilesetParser(filename)
00215     #print 'result 2: ',p2.runs()