CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_4_1_8_patch9/src/RecoLuminosity/LumiDB/python/inputFilesetParser.py

Go to the documentation of this file.
00001 import os,csv
00002 from RecoLuminosity.LumiDB import csvSelectionParser,selectionParser,CommonUtil
00003 class inputFilesetParser(object):
00004     def __init__(self,inputfilename):
00005         filelist=inputfilename.split('+')
00006         self.__inputresultfiles=filelist[0:-1]
00007         self.__inputselectionfile=filelist[-1]
00008         self.__inputResultHeader=[]
00009         self.__inputResult=[]
00010         self.__inputSelectionFileparsingResult=None
00011         if len(self.__inputselectionfile)!=0:
00012             basename,extension=os.path.splitext(self.__inputselectionfile)
00013             if extension=='.csv':#if file ends with .csv,use csv parser,else parse as json file
00014                 self.__inputSelectionFileparsingResult=csvSelectionParser.csvSelectionParser(self.__inputselectionfile)
00015             else:
00016                 selectf=open(self.__inputselectionfile,'r')
00017                 inputfilecontent=selectf.read()
00018                 self.__inputSelectionFileparsingResult=selectionParser.selectionParser(inputfilecontent)
00019         if len(self.__inputresultfiles)!=0:
00020             header=''
00021             for f in self.__inputresultfiles:
00022                 ifile=open(f)
00023                 hasHeader=csv.Sniffer().has_header(ifile.read(1024))
00024                 ifile.seek(0)
00025                 csvReader=csv.reader(ifile,delimiter=',')
00026                 irow=0
00027                 for row in csvReader:
00028                     if hasHeader and irow==0:
00029                         self.__inputResultHeader=row
00030                     else:
00031                         self.__inputResult.append(row)
00032                     irow=irow+1
00033                 ifile.close()
00034     def selectionfilename(self):
00035         '''return the input selection file name
00036         '''
00037         return self.__inputselectionfile
00038     def mergeResultOnly(self):
00039         '''if empty input selection filename give, I assume you only need to merge pieces of output result files into one 
00040         '''
00041         return len(self.__inputselectionfile)==0
00042     def resultfiles(self):
00043         return self.__inputresultfiles
00044     def resultHeader(self):
00045         '''
00046         output [headerfields]
00047         '''
00048         return self.__inputResultHeader
00049     def resultInput(self):
00050         '''
00051         output [valuefields]
00052         '''
00053         return self.__inputResult
00054     def fieldvalues(self,fieldname,fieldtype):
00055         '''
00056         given the input result field name and typem return the list of values
00057         '''
00058         fieldidx=None
00059         result=[]
00060         try:
00061             fieldidx=self.__inputResultHeader.index(fieldname)
00062         except:
00063             print 'field ',fieldname,' not found'
00064             raise
00065         for r in self.__inputResult:
00066             stringvalue=r[fieldidx]
00067             if fieldtype in ['int','unsigned int']:
00068                 if not CommonUtil.is_intstr(stringvalue):
00069                     print 'field ',fieldname,' is not integer type'
00070                     raise
00071                 else:
00072                     result.append(int(stringvalue))
00073                     continue
00074             elif fieldtype in ['float']:
00075                 if not CommonUtil.is_floatstr(stringvalue):
00076                     print 'field ',fieldname,' is not float type'
00077                     raise
00078                 else:
00079                     result.append(float(stringvalue))
00080                     contine
00081             elif  fieldtype in ['string','str']:
00082                 result.append(stringvalue)
00083             else:
00084                 raise 'unsupported type ',fieldtype
00085         return result
00086     def fieldtotal(self,fieldname,fieldtype):
00087         '''
00088         given the input result field name and type, return the total
00089         '''
00090         fieldidx=None
00091         result=0
00092         try:
00093             fieldidx=self.__inputResultHeader.index(fieldname)
00094         except:
00095             print 'field ',fieldname,' not found'
00096             raise
00097         for r in self.__inputResult:
00098             stringvalue=r[fieldidx]
00099             if fieldtype in ['int','unsigned int']:
00100                 if not CommonUtil.is_intstr(stringvalue):
00101                     print 'field ',fieldname,' is not integer type'
00102                     raise
00103                 else:
00104                     result=int(result)+int(stringvalue)
00105                     continue
00106             elif fieldtype in ['float'] :
00107                 if not CommonUtil.is_floatstr(stringvalue):
00108                     print 'field ',fieldname,' is not float type'
00109                     raise
00110                 else:
00111                     result=float(result)+float(stringvalue)
00112                     continue
00113             else:
00114                 raise 'cannot sum types other than int ,float'
00115         return result
00116     def runs(self):
00117         if not self.__inputSelectionFileparsingResult:
00118             return None
00119         return self.__inputSelectionFileparsingResult.runs()
00120     def runsandls(self):
00121         if not self.__inputSelectionFileparsingResult:
00122             return None
00123         return self.__inputSelectionFileparsingResult.runsandls()
00124     def runsandlsStr(self):
00125         if not self.__inputSelectionFileparsingResult:
00126             return None
00127         return self.__inputSelectionFileparsingResult.runsandlsStr()
00128     
00129 if __name__ == '__main__':
00130     result={}
00131     filename='../test/overview.csv+../test/overview-140381.csv+../test/Cert_132440-139103_7TeV_StreamExpress_Collisions10_JSON.txt'
00132     #filename='../test/overview.csv+../test/overview-140381.csv+'
00133     p=inputFilesetParser(filename)
00134     print p.selectionfilename()
00135     print p.resultfiles()
00136     #print p.runs()
00137     #print p.runsandls()
00138     print 'do I only need to merge the results? ',p.mergeResultOnly()
00139     resultheader=p.resultHeader()
00140     result=p.resultInput()
00141     alreadyprocessedRuns=p.fieldvalues('run','int')
00142     print 'runs already have results ', alreadyprocessedRuns
00143     print 'total delivered ',p.fieldtotal('delivered','float')
00144     print 'total recorded ',p.fieldtotal('recorded','float')
00145     newrunsandls={}
00146     for run,cmslslist in p.runsandls().items():
00147         if run in alreadyprocessedRuns:
00148             continue
00149         else:
00150             newrunsandls[run]=cmslslist
00151     print 'runs and ls still need to be processed', newrunsandls
00152     filename='../test/lumi_900_output.json'
00153     p2=inputFilesetParser(filename)
00154     print 'result 2: ',p2.runs()