Go to the documentation of this file.00001 import os,csv
00002 from RecoLuminosity.LumiDB import csvSelectionParser,selectionParser,CommonUtil
00003 class inputFilesetParser(object):
00004 def __init__(self,inputfilename):
00005 filelist=inputfilename.split('+')
00006 self.__inputresultfiles=filelist[0:-1]
00007 self.__inputselectionfile=filelist[-1]
00008 self.__inputResultHeader=[]
00009 self.__inputResult=[]
00010 self.__inputSelectionFileparsingResult=None
00011 if len(self.__inputselectionfile)!=0:
00012 basename,extension=os.path.splitext(self.__inputselectionfile)
00013 if extension=='.csv':
00014 self.__inputSelectionFileparsingResult=csvSelectionParser.csvSelectionParser(self.__inputselectionfile)
00015 else:
00016 selectf=open(self.__inputselectionfile,'r')
00017 inputfilecontent=selectf.read()
00018 self.__inputSelectionFileparsingResult=selectionParser.selectionParser(inputfilecontent)
00019 if len(self.__inputresultfiles)!=0:
00020 header=''
00021 for f in self.__inputresultfiles:
00022 ifile=open(f)
00023 hasHeader=csv.Sniffer().has_header(ifile.read(1024))
00024 ifile.seek(0)
00025 csvReader=csv.reader(ifile,delimiter=',')
00026 irow=0
00027 for row in csvReader:
00028 if hasHeader and irow==0:
00029 self.__inputResultHeader=row
00030 else:
00031 self.__inputResult.append(row)
00032 irow=irow+1
00033 ifile.close()
00034 def selectionfilename(self):
00035 '''return the input selection file name
00036 '''
00037 return self.__inputselectionfile
00038 def mergeResultOnly(self):
00039 '''if empty input selection filename give, I assume you only need to merge pieces of output result files into one
00040 '''
00041 return len(self.__inputselectionfile)==0
00042 def resultfiles(self):
00043 return self.__inputresultfiles
00044 def resultHeader(self):
00045 '''
00046 output [headerfields]
00047 '''
00048 return self.__inputResultHeader
00049 def resultInput(self):
00050 '''
00051 output [valuefields]
00052 '''
00053 return self.__inputResult
00054 def fieldvalues(self,fieldname,fieldtype):
00055 '''
00056 given the input result field name and typem return the list of values
00057 '''
00058 fieldidx=None
00059 result=[]
00060 try:
00061 fieldidx=self.__inputResultHeader.index(fieldname)
00062 except:
00063 print 'field ',fieldname,' not found'
00064 raise
00065 for r in self.__inputResult:
00066 stringvalue=r[fieldidx]
00067 if fieldtype in ['int','unsigned int']:
00068 if not CommonUtil.is_intstr(stringvalue):
00069 print 'field ',fieldname,' is not integer type'
00070 raise
00071 else:
00072 result.append(int(stringvalue))
00073 continue
00074 elif fieldtype in ['float']:
00075 if not CommonUtil.is_floatstr(stringvalue):
00076 print 'field ',fieldname,' is not float type'
00077 raise
00078 else:
00079 result.append(float(stringvalue))
00080 contine
00081 elif fieldtype in ['string','str']:
00082 result.append(stringvalue)
00083 else:
00084 raise 'unsupported type ',fieldtype
00085 return result
00086 def fieldtotal(self,fieldname,fieldtype):
00087 '''
00088 given the input result field name and type, return the total
00089 '''
00090 fieldidx=None
00091 result=0
00092 try:
00093 fieldidx=self.__inputResultHeader.index(fieldname)
00094 except:
00095 print 'field ',fieldname,' not found'
00096 raise
00097 for r in self.__inputResult:
00098 stringvalue=r[fieldidx]
00099 if fieldtype in ['int','unsigned int']:
00100 if not CommonUtil.is_intstr(stringvalue):
00101 print 'field ',fieldname,' is not integer type'
00102 raise
00103 else:
00104 result=int(result)+int(stringvalue)
00105 continue
00106 elif fieldtype in ['float'] :
00107 if not CommonUtil.is_floatstr(stringvalue):
00108 print 'field ',fieldname,' is not float type'
00109 raise
00110 else:
00111 result=float(result)+float(stringvalue)
00112 continue
00113 else:
00114 raise 'cannot sum types other than int ,float'
00115 return result
00116 def runs(self):
00117 if not self.__inputSelectionFileparsingResult:
00118 return None
00119 return self.__inputSelectionFileparsingResult.runs()
00120 def runsandls(self):
00121 if not self.__inputSelectionFileparsingResult:
00122 return None
00123 return self.__inputSelectionFileparsingResult.runsandls()
00124 def runsandlsStr(self):
00125 if not self.__inputSelectionFileparsingResult:
00126 return None
00127 return self.__inputSelectionFileparsingResult.runsandlsStr()
00128
00129 if __name__ == '__main__':
00130 result={}
00131 filename='../test/overview.csv+../test/overview-140381.csv+../test/Cert_132440-139103_7TeV_StreamExpress_Collisions10_JSON.txt'
00132
00133 p=inputFilesetParser(filename)
00134 print p.selectionfilename()
00135 print p.resultfiles()
00136
00137
00138 print 'do I only need to merge the results? ',p.mergeResultOnly()
00139 resultheader=p.resultHeader()
00140 result=p.resultInput()
00141 alreadyprocessedRuns=p.fieldvalues('run','int')
00142 print 'runs already have results ', alreadyprocessedRuns
00143 print 'total delivered ',p.fieldtotal('delivered','float')
00144 print 'total recorded ',p.fieldtotal('recorded','float')
00145 newrunsandls={}
00146 for run,cmslslist in p.runsandls().items():
00147 if run in alreadyprocessedRuns:
00148 continue
00149 else:
00150 newrunsandls[run]=cmslslist
00151 print 'runs and ls still need to be processed', newrunsandls
00152 filename='../test/lumi_900_output.json'
00153 p2=inputFilesetParser(filename)
00154 print 'result 2: ',p2.runs()