CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
List of all members | Public Member Functions | Private Member Functions | Private Attributes | Static Private Attributes
dataset.Dataset Class Reference

Public Member Functions

def __init__
 
def convertTimeToRun
 
def datasetSnippet
 
def dataType
 
def dump_cff
 
def fileInfoList
 
def fileList
 
def forcerunrange
 
def getForceRunRangeFunction
 
def magneticField
 
def magneticFieldForRun
 
def name
 
def parentDataset
 
def predefined
 
def runList
 

Private Member Functions

def __chunks
 
def __createSnippet
 
def __dateString
 
def __datetime
 
def __find_ge
 
def __find_lt
 
def __findInJson
 
def __getData
 
def __getDataType
 
def __getFileInfoList
 
def __getMagneticField
 
def __getMagneticFieldForRun
 
def __getParentDataset
 
def __getRunList
 

Private Attributes

 __alreadyStored
 
 __cmssw
 
 __cmsswrelease
 
 __dasLimit
 
 __dataType
 
 __fileInfoList
 
 __fileList
 
 __filename
 
 __firstusedrun
 
 __lastusedrun
 
 __magneticField
 
 __name
 
 __official
 
 __origName
 
 __parentDataset
 
 __parentFileInfoList
 
 __parentFileList
 
 __predefined
 
 __runList
 

Static Private Attributes

tuple __dummy_source_template
 

Detailed Description

Definition at line 14 of file dataset.py.

Constructor & Destructor Documentation

def dataset.Dataset.__init__ (   self,
  datasetName,
  dasLimit = 0,
  tryPredefinedFirst = True,
  cmssw = os.environ["CMSSW_BASE"],
  cmsswrelease = os.environ["CMSSW_RELEASE_BASE"] 
)

Definition at line 16 of file dataset.py.

16 
17  cmssw = os.environ["CMSSW_BASE"], cmsswrelease = os.environ["CMSSW_RELEASE_BASE"]):
18  self.__name = datasetName
19  self.__origName = datasetName
20  self.__dasLimit = dasLimit
21  self.__fileList = None
22  self.__fileInfoList = None
23  self.__runList = None
24  self.__alreadyStored = False
25  self.__cmssw = cmssw
26  self.__cmsswrelease = cmsswrelease
27  self.__firstusedrun = None
28  self.__lastusedrun = None
29  self.__parentDataset = None
30  self.__parentFileList = None
31  self.__parentFileInfoList = None
32 
33  # check, if dataset name matches CMS dataset naming scheme
34  if re.match( r'/.+/.+/.+', self.__name ):
35  self.__official = True
36  fileName = "Dataset" + self.__name.replace("/","_") + "_cff.py"
37  else:
38  self.__official = False
39  fileName = self.__name + "_cff.py"
40 
41  searchPath1 = os.path.join( self.__cmssw, "python",
42  "Alignment", "OfflineValidation",
43  fileName )
44  searchPath2 = os.path.join( self.__cmssw, "src",
45  "Alignment", "OfflineValidation",
46  "python", fileName )
47  searchPath3 = os.path.join( self.__cmsswrelease,
48  "python", "Alignment",
49  "OfflineValidation", fileName )
50  if self.__official and not tryPredefinedFirst:
51  self.__predefined = False
52  elif os.path.exists( searchPath1 ):
53  self.__predefined = True
54  self.__filename = searchPath1
55  elif os.path.exists( searchPath2 ):
56  msg = ("The predefined dataset '%s' does exist in '%s', but "
57  "you need to run 'scram b' first."
58  %( self.__name, searchPath2 ))
59  if self.__official:
60  print msg
61  print "Getting the data from DAS again. To go faster next time, run scram b."
62  else:
63  raise AllInOneError( msg )
64  elif os.path.exists( searchPath3 ):
65  self.__predefined = True
66  self.__filename = searchPath3
67  elif self.__official:
68  self.__predefined = False
69  else:
70  msg = ("The predefined dataset '%s' does not exist. Please "
71  "create it first or check for typos."%( self.__name ))
72  raise AllInOneError( msg )
73 
74  if self.__predefined and self.__official:
75  self.__name = "Dataset" + self.__name.replace("/","_")
76 
77  self.__dataType = self.__getDataType()
def __getMagneticField
Definition: dataset.py:391
def __getDataType
Definition: dataset.py:356

Member Function Documentation

def dataset.Dataset.__chunks (   self,
  theList,
  n 
)
private
Yield successive n-sized chunks from theList.

Definition at line 79 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

79 
80  def __chunks( self, theList, n ):
81  """ Yield successive n-sized chunks from theList.
82  """
83  for i in xrange( 0, len( theList ), n ):
84  yield theList[i:i+n]
def dataset.Dataset.__createSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  repMap = None,
  crab = False,
  parent = False 
)
private

Definition at line 117 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__dummy_source_template, dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getRunList(), dataset.Dataset.__lastusedrun, dataset.Dataset.convertTimeToRun(), dataset.Dataset.fileList(), dataset.Dataset.getForceRunRangeFunction(), join(), list(), bookConverter.max, min(), dataset.Dataset.predefined(), and split.

Referenced by dataset.Dataset.datasetSnippet(), and dataset.Dataset.dump_cff().

118  crab = False, parent = False ):
119  if firstRun:
120  firstRun = int( firstRun )
121  if lastRun:
122  lastRun = int( lastRun )
123  if ( begin and firstRun ) or ( end and lastRun ):
124  msg = ( "The Usage of "
125  + "'begin' & 'firstRun' " * int( bool( begin and
126  firstRun ) )
127  + "and " * int( bool( ( begin and firstRun ) and
128  ( end and lastRun ) ) )
129  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
130  + "is ambigous." )
131  raise AllInOneError( msg )
132  if begin or end:
133  ( firstRun, lastRun ) = self.convertTimeToRun(
134  begin = begin, end = end, firstRun = firstRun,
135  lastRun = lastRun )
136  if ( firstRun and lastRun ) and ( firstRun > lastRun ):
137  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
138  "chosen is greater than the upper time/runrange limit "
139  "('end'/'lastRun').")
140  raise AllInOneError( msg )
141  if self.predefined() and (jsonPath or begin or end or firstRun or lastRun):
142  msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
143  "only work for official datasets, not predefined _cff.py files" )
144  raise AllInOneError( msg )
145  goodLumiSecStr = ""
146  lumiStr = ""
147  lumiSecExtend = ""
148  if firstRun or lastRun or jsonPath:
149  goodLumiSecStr = ( "lumiSecs = cms.untracked."
150  "VLuminosityBlockRange()\n" )
151  lumiStr = " lumisToProcess = lumiSecs,\n"
152  if not jsonPath:
153  selectedRunList = self.__getRunList()
154  if firstRun:
155  selectedRunList = [ run for run in selectedRunList \
156  if self.__findInJson(run, "run_number") >= firstRun ]
157  if lastRun:
158  selectedRunList = [ run for run in selectedRunList \
159  if self.__findInJson(run, "run_number") <= lastRun ]
160  lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
161  + str( self.__findInJson(run, "run_number") ) + ":max" \
162  for run in selectedRunList ]
163  splitLumiList = list( self.__chunks( lumiList, 255 ) )
164  else:
165  theLumiList = None
166  try:
167  theLumiList = LumiList ( filename = jsonPath )
168  except ValueError:
169  pass
170 
171  if theLumiList is not None:
172  allRuns = theLumiList.getRuns()
173  runsToRemove = []
174  for run in allRuns:
175  if firstRun and int( run ) < firstRun:
176  runsToRemove.append( run )
177  if lastRun and int( run ) > lastRun:
178  runsToRemove.append( run )
179  theLumiList.removeRuns( runsToRemove )
180  splitLumiList = list( self.__chunks(
181  theLumiList.getCMSSWString().split(','), 255 ) )
182  else:
183  with open(jsonPath) as f:
184  jsoncontents = f.read()
185  if "process.source.lumisToProcess" in jsoncontents:
186  msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
187  if firstRun or lastRun:
188  msg += ("\n (after applying firstRun and/or lastRun)")
189  msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
190  msg += "\nCheck your config file to make sure that it worked properly."
191  print msg
192 
193  runlist = self.__getRunList()
194  if firstRun or lastRun:
195  self.__firstusedrun = -1
196  self.__lastusedrun = -1
197  jsoncontents = re.sub("\d+:(\d+|max)-\d+:(\d+|max)", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
198  self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number")))
199  self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number")))
200  else:
201  self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number"))
202  self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number"))
203  lumiSecExtend = jsoncontents
204  splitLumiList = [[""]]
205 
206  if splitLumiList and splitLumiList[0]:
207  if splitLumiList[0][0]:
208  lumiSecStr = [ "',\n'".join( lumis ) \
209  for lumis in splitLumiList ]
210  lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
211  for lumis in lumiSecStr ]
212  lumiSecExtend = "\n".join( lumiSecStr )
213  runlist = self.__getRunList()
214  self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number")))
215  self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number")))
216  else:
217  msg = "You are trying to run a validation without any runs! Check that:"
218  if firstRun or lastRun:
219  msg += "\n - firstRun and lastRun are correct for this dataset, and there are runs in between containing data"
220  if jsonPath:
221  msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
222  if (firstRun or lastRun) and jsonPath:
223  msg += "\n - firstRun and lastRun are consistent with your JSON file"
224  if begin:
225  msg = msg.replace("firstRun", "begin")
226  if end:
227  msg = msg.replace("lastRun", "end")
228  raise AllInOneError(msg)
229 
230  else:
231  runlist = self.__getRunList()
232  self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
233  self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))
234 
235  if crab:
236  files = ""
237  else:
238  splitFileList = list( self.__chunks( self.fileList(), 255 ) )
239  fileStr = [ "',\n'".join( files ) for files in splitFileList ]
240  fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
241  for files in fileStr ]
242  files = "\n".join( fileStr )
243 
244  if parent:
245  splitParentFileList = list( self.__chunks( self.fileList(parent = True), 255 ) )
246  parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
247  parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
248  for parentFiles in parentFileStr ]
249  parentFiles = "\n".join( parentFileStr )
250  files += "\n\n" + parentFiles
251 
252 
253  theMap = repMap
254  theMap["files"] = files
255  theMap["json"] = jsonPath
256  theMap["lumiStr"] = lumiStr
257  theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
258  theMap["lumiSecExtend"] = lumiSecExtend
259  if crab:
260  dataset_snippet = self.__dummy_source_template%( theMap )
261  else:
262  dataset_snippet = self.__source_template%( theMap )
263  return dataset_snippet
def __findInJson
Definition: dataset.py:278
T min(T a, T b)
Definition: MathUtil.h:58
def convertTimeToRun
Definition: dataset.py:618
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
tuple __dummy_source_template
Definition: dataset.py:103
def __getRunList
Definition: dataset.py:591
def getForceRunRangeFunction
Definition: dataset.py:321
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def dataset.Dataset.__dateString (   self,
  date 
)
private

Definition at line 613 of file dataset.py.

References dataset.Dataset.convertTimeToRun().

Referenced by dataset.Dataset.convertTimeToRun().

614  def __dateString(self, date):
615  return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
def __dateString
Definition: dataset.py:613
def dataset.Dataset.__datetime (   self,
  stringForDas 
)
private

Definition at line 604 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

605  def __datetime(self, stringForDas):
606  if len(stringForDas) != 8:
607  raise AllInOneError(stringForDas + " is not a valid date string.\n"
608  + "DAS accepts dates in the form 'yyyymmdd'")
609  year = stringForDas[:4]
610  month = stringForDas[4:6]
611  day = stringForDas[6:8]
612  return datetime.date(int(year), int(month), int(day))
def dataset.Dataset.__find_ge (   self,
  a,
  x 
)
private

Definition at line 271 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

272  def __find_ge( self, a, x):
273  'Find leftmost item greater than or equal to x'
274  i = bisect.bisect_left( a, x )
275  if i != len( a ):
276  return i
277  raise ValueError
def dataset.Dataset.__find_lt (   self,
  a,
  x 
)
private

Definition at line 264 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

265  def __find_lt( self, a, x ):
266  'Find rightmost value less than x'
267  i = bisect.bisect_left( a, x )
268  if i:
269  return i-1
270  raise ValueError
def dataset.Dataset.__findInJson (   self,
  jsondict,
  strings 
)
private

Definition at line 278 of file dataset.py.

References dataset.Dataset.__findInJson().

Referenced by dataset.Dataset.__createSnippet(), dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.fileList().

279  def __findInJson(self, jsondict, strings):
280  if isinstance(strings, str):
281  strings = [ strings ]
282 
283  if len(strings) == 0:
284  return jsondict
285  if isinstance(jsondict,dict):
286  if strings[0] in jsondict:
287  try:
288  return self.__findInJson(jsondict[strings[0]], strings[1:])
289  except KeyError:
290  pass
291  else:
292  for a in jsondict:
293  if strings[0] in a:
294  try:
295  return self.__findInJson(a[strings[0]], strings[1:])
296  except (TypeError, KeyError): #TypeError because a could be a string and contain strings[0]
297  pass
298  #if it's not found
299  raise KeyError("Can't find " + strings[0])
def __findInJson
Definition: dataset.py:278
def dataset.Dataset.__getData (   self,
  dasQuery,
  dasLimit = 0 
)
private

Definition at line 326 of file dataset.py.

References dataset.Dataset.__findInJson().

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), and dataset.Dataset.convertTimeToRun().

327  def __getData( self, dasQuery, dasLimit = 0 ):
328  dasData = das_client.get_data( 'https://cmsweb.cern.ch',
329  dasQuery, 0, dasLimit, False )
330  if isinstance(dasData, str):
331  jsondict = json.loads( dasData )
332  else:
333  jsondict = dasData
334  # Check, if the DAS query fails
335  try:
336  error = self.__findInJson(jsondict,["data","error"])
337  except KeyError:
338  error = None
339  if error or self.__findInJson(jsondict,"status") != 'ok' or "data" not in jsondict:
340  jsonstr = str(jsondict)
341  if len(jsonstr) > 10000:
342  jsonfile = "das_query_output_%i.txt"
343  i = 0
344  while os.path.lexists(jsonfile % i):
345  i += 1
346  jsonfile = jsonfile % i
347  theFile = open( jsonfile, "w" )
348  theFile.write( jsonstr )
349  theFile.close()
350  msg = "The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile
351  else:
352  msg = "The DAS query returned a error. Here is the output\n" + jsonstr
353  msg += "\nIt's possible that this was a server error. If so, it may work if you try again later"
354  raise AllInOneError(msg)
355  return self.__findInJson(jsondict,"data")
def __findInJson
Definition: dataset.py:278
def dataset.Dataset.__getDataType (   self)
private

Definition at line 356 of file dataset.py.

References dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, entry.name, average.Average.name, histograms.Histograms.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::TAG::TIME_TYPE.name, listHistos.plotInfo.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::COND_LOG_TABLE::EXECTIME.name, cond::persistency::TAG::OBJECT_TYPE.name, preexistingValidation.PreexistingValidation.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, ora::RecordSpecImpl::Item.name, cond::persistency::COND_LOG_TABLE::IOVTAG.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::COND_LOG_TABLE::USERTEXT.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, personalPlayback.Applet.name, HistoDef.name(), fit::RootMinuitCommand.name, CombinedMVAJetTagComputer::Computer.name, ParameterSet.name, PixelDCSObject< class >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, CharmTagger::MVAVar.name, HcalForwardLibWriter::FileHandle.name, PFTauMVAInputDiscriminantTranslator::DiscriminantInfo.name, PrintSensitive.name, alignment.Alignment.name, ALIFileOut.name(), RHStopTracer::StopPoint.name, ParSet.name, runEdmFileComparison.EdmObject.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, PhysicsTools::Source.name, XMLHTRZeroSuppressionLoader::_loaderBaseConfig.name, DQMGenericClient::EfficOption.name, XMLRBXPedestalsLoader::_loaderBaseConfig.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, MyWatcher.name, edm::PathTimingSummary.name, dirstructure.Weighted.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, PrintMaterialBudgetInfo.name, edm::PathSummary.name, ALIFileIn.name(), cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, Mapper::definition< ScannerT >.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, BPhysicsSpectrum.name, ExpressionHisto< T >.name, McSelector.name, python.rootplot.utilities.Hist2D.name, SensitiveDetector.name, RecoSelector.name, XMLProcessor::_loaderBaseConfig.name, TreeCrawler.Package.name, Entry.name(), DQMGenericClient::ProfileOption.name, cond::persistency::PAYLOAD::HASH.name, CaloTrkProcessing::Detector.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, PrintGeomInfoAction.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, MagGeoBuilderFromDDD::volumeHandle.name, PrintGeomMatInfo.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::NormOption.name, OpticalObject.name(), options.ConnectionHLTMenu.name, DQMGenericClient::CDOption.name, FastHFShowerLibrary.name, h4DSegm.name, PhysicsTools::Variable::Value.name, PhysicsTools::Calibration::Variable.name, looper.Looper.name, EDMtoMEConverter.name, cond::TagInfo_t.name, ProcTMVA::Method.name, TreeSaver::Var.name, BPhysicsValidation::ParticleMonitor.name, python.rootplot.tree2hists.Plot.name, config.Analyzer.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, TrackerSectorStruct.name, cond::persistency::IOV::PAYLOAD_HASH.name, PhysicsTools::TreeReader::Value.name, cond::persistency::IOV::INSERTION_TIME.name, BPhysicsValidation.name, MuonGeometrySanityCheckPoint.name, PhysicsTools::TrainProcessor.name, Measurement.name(), FastTimerSD.name, BHMSD.name, TotemSD.name, config.Service.name, PhysicsTools::MVAModuleHelper< Record, Object, Filler >::Value.name, HistoData.name, PhysicsTools::ProcessRegistry< Base_t, CalibBase_t, Parent_t >.name, utils.StatisticalTest.name, core.autovars.NTupleSubObject.name, PhysicsTools::MVATrainer.name, core.autovars.NTupleObject.name, h2DSegm.name, options.HLTProcessOptions.name, python.rootplot.utilities.Hist.name, BscSD.name, IntegratedCalibrationBase.name(), DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleCollection.name, FP420SD.name, cond::persistency::TAG_MIGRATION::SOURCE_ACCOUNT.name, cond::persistency::TAG_MIGRATION::SOURCE_TAG.name, cond::persistency::TAG_MIGRATION::TAG_NAME.name, cond::persistency::TAG_MIGRATION::STATUS_CODE.name, cond::persistency::TAG_MIGRATION::INSERTION_TIME.name, public_plots_tools.ColorScheme.name, PhysicsTools::Variable.name, BasicHepMCValidation::ParticleMonitor.name, HRes1DHit.name, PhysicsTools::TrainerMonitoring::Object.name, FastTimerService::LuminosityDescription.name, cond::persistency::PAYLOAD_MIGRATION::SOURCE_ACCOUNT.name, cond::persistency::PAYLOAD_MIGRATION::SOURCE_TOKEN.name, cond::persistency::PAYLOAD_MIGRATION::PAYLOAD_HASH.name, cond::persistency::PAYLOAD_MIGRATION::INSERTION_TIME.name, conddblib.Tag.name, utils.KS.name, conddblib.GlobalTag.name, utils.Chi2.name, personalPlayback.FrameworkJob.name, utils_v2.StatisticalTest.name, utils.BinToBin.name, HEff1DHit.name, plotscripts.SawTeethFunction.name, utils_v2.KolmogorovTest.name, dirstructure.Comparison.name, utils_v2.Chi2Test.name, @16915::Id.name, dqm_interfaces.DirID.name, utils.BinToBin1percent.name, FastTimerService::ProcessDescription.name, python.rootplot.utilities.RootFile.name, hTMaxCell.name, HRes2DHit.name, cscdqm::ParHistoDef.name, dqm_interfaces.DirWalkerFile.name, BeautifulSoup.Tag.name, @16911::Id.name, HEff2DHit.name, dataset.Dataset.name(), TiXmlAttribute.name, BeautifulSoup.SoupStrainer.name, HRes4DHit.name, HEff4DHit.name, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.dataType().

357  def __getDataType( self ):
358  if self.__predefined:
359  with open(self.__filename) as f:
360  datatype = None
361  for line in f.readlines():
362  if line.startswith("#data type: "):
363  if datatype is not None:
364  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
365  datatype = line.replace("#data type: ", "").replace("\n","")
366  return datatype
367  return "unknown"
368 
369  dasQuery_type = ( 'dataset dataset=%s | grep dataset.datatype,'
370  'dataset.name'%( self.__name ) )
371  data = self.__getData( dasQuery_type )
372 
373  try:
374  return self.__findInJson(data, ["dataset", "datatype"])
375  except KeyError:
376  print ("Cannot find the datatype of the dataset '%s'\n"
377  "It may not be possible to automatically find the magnetic field,\n"
378  "and you will not be able run in CRAB mode"
379  %( self.name() ))
380  return "unknown"
def __findInJson
Definition: dataset.py:278
def __getDataType
Definition: dataset.py:356
def dataset.Dataset.__getFileInfoList (   self,
  dasLimit,
  parent = False 
)
private

Definition at line 525 of file dataset.py.

References dataset.Dataset.__fileInfoList, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__parentFileInfoList, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, entry.name, average.Average.name, histograms.Histograms.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::TAG::TIME_TYPE.name, listHistos.plotInfo.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::COND_LOG_TABLE::EXECTIME.name, cond::persistency::TAG::OBJECT_TYPE.name, preexistingValidation.PreexistingValidation.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, ora::RecordSpecImpl::Item.name, cond::persistency::COND_LOG_TABLE::IOVTAG.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::COND_LOG_TABLE::USERTEXT.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, personalPlayback.Applet.name, HistoDef.name(), fit::RootMinuitCommand.name, CombinedMVAJetTagComputer::Computer.name, ParameterSet.name, PixelDCSObject< class >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, CharmTagger::MVAVar.name, HcalForwardLibWriter::FileHandle.name, PFTauMVAInputDiscriminantTranslator::DiscriminantInfo.name, PrintSensitive.name, alignment.Alignment.name, ALIFileOut.name(), RHStopTracer::StopPoint.name, ParSet.name, runEdmFileComparison.EdmObject.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, PhysicsTools::Source.name, XMLHTRZeroSuppressionLoader::_loaderBaseConfig.name, DQMGenericClient::EfficOption.name, XMLRBXPedestalsLoader::_loaderBaseConfig.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, MyWatcher.name, edm::PathTimingSummary.name, dirstructure.Weighted.name, lumi::TriggerInfo.name, cond::TimeTypeSpecs.name, PrintMaterialBudgetInfo.name, ALIFileIn.name(), edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, Mapper::definition< ScannerT >.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, BPhysicsSpectrum.name, ExpressionHisto< T >.name, McSelector.name, python.rootplot.utilities.Hist2D.name, SensitiveDetector.name, RecoSelector.name, XMLProcessor::_loaderBaseConfig.name, DQMGenericClient::ProfileOption.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::HASH.name, Entry.name(), CaloTrkProcessing::Detector.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, PrintGeomInfoAction.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, MagGeoBuilderFromDDD::volumeHandle.name, PrintGeomMatInfo.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::NormOption.name, OpticalObject.name(), options.ConnectionHLTMenu.name, DQMGenericClient::CDOption.name, FastHFShowerLibrary.name, h4DSegm.name, PhysicsTools::Variable::Value.name, PhysicsTools::Calibration::Variable.name, looper.Looper.name, EDMtoMEConverter.name, cond::TagInfo_t.name, ProcTMVA::Method.name, TreeSaver::Var.name, BPhysicsValidation::ParticleMonitor.name, python.rootplot.tree2hists.Plot.name, config.Analyzer.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, TrackerSectorStruct.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, PhysicsTools::TreeReader::Value.name, cond::persistency::IOV::INSERTION_TIME.name, BPhysicsValidation.name, PhysicsTools::TrainProcessor.name, MuonGeometrySanityCheckPoint.name, Measurement.name(), FastTimerSD.name, BHMSD.name, TotemSD.name, config.Service.name, PhysicsTools::MVAModuleHelper< Record, Object, Filler >::Value.name, HistoData.name, PhysicsTools::ProcessRegistry< Base_t, CalibBase_t, Parent_t >.name, utils.StatisticalTest.name, core.autovars.NTupleSubObject.name, PhysicsTools::MVATrainer.name, core.autovars.NTupleObject.name, h2DSegm.name, python.rootplot.utilities.Hist.name, options.HLTProcessOptions.name, BscSD.name, IntegratedCalibrationBase.name(), DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleCollection.name, FP420SD.name, cond::persistency::TAG_MIGRATION::SOURCE_ACCOUNT.name, cond::persistency::TAG_MIGRATION::SOURCE_TAG.name, cond::persistency::TAG_MIGRATION::TAG_NAME.name, cond::persistency::TAG_MIGRATION::STATUS_CODE.name, cond::persistency::TAG_MIGRATION::INSERTION_TIME.name, public_plots_tools.ColorScheme.name, PhysicsTools::Variable.name, BasicHepMCValidation::ParticleMonitor.name, HRes1DHit.name, PhysicsTools::TrainerMonitoring::Object.name, FastTimerService::LuminosityDescription.name, cond::persistency::PAYLOAD_MIGRATION::SOURCE_ACCOUNT.name, cond::persistency::PAYLOAD_MIGRATION::SOURCE_TOKEN.name, cond::persistency::PAYLOAD_MIGRATION::PAYLOAD_HASH.name, cond::persistency::PAYLOAD_MIGRATION::INSERTION_TIME.name, conddblib.Tag.name, utils.KS.name, conddblib.GlobalTag.name, utils.Chi2.name, personalPlayback.FrameworkJob.name, utils_v2.StatisticalTest.name, utils.BinToBin.name, HEff1DHit.name, plotscripts.SawTeethFunction.name, utils_v2.KolmogorovTest.name, dirstructure.Comparison.name, utils_v2.Chi2Test.name, @16915::Id.name, dqm_interfaces.DirID.name, utils.BinToBin1percent.name, FastTimerService::ProcessDescription.name, python.rootplot.utilities.RootFile.name, hTMaxCell.name, HRes2DHit.name, cscdqm::ParHistoDef.name, dqm_interfaces.DirWalkerFile.name, BeautifulSoup.Tag.name, @16911::Id.name, HEff2DHit.name, dataset.Dataset.name(), TiXmlAttribute.name, BeautifulSoup.SoupStrainer.name, HRes4DHit.name, HEff4DHit.name, and dataset.Dataset.parentDataset().

Referenced by dataset.Dataset.fileInfoList().

526  def __getFileInfoList( self, dasLimit, parent = False ):
527  if self.__predefined:
528  if parent:
529  extendstring = "secFiles.extend"
530  else:
531  extendstring = "readFiles.extend"
532  with open(self.__fileName) as f:
533  files = []
534  copy = False
535  for line in f.readlines():
536  if "]" in line:
537  copy = False
538  if copy:
539  files.append({name: line.translate(None, "', " + '"')})
540  if extendstring in line and "[" in line and "]" not in line:
541  copy = True
542  return files
543 
544  if self.__fileInfoList and not parent:
545  return self.__fileInfoList
546  if self.__parentFileInfoList and parent:
547  return self.__parentFileInfoList
548 
549  if parent:
550  searchdataset = self.parentDataset()
551  else:
552  searchdataset = self.__name
553  dasQuery_files = ( 'file dataset=%s | grep file.name, file.nevents, '
554  'file.creation_time, '
555  'file.modification_time'%( searchdataset ) )
556  print "Requesting file information for '%s' from DAS..."%( searchdataset ),
557  data = self.__getData( dasQuery_files, dasLimit )
558  print "Done."
559  data = [ self.__findInJson(entry,"file") for entry in data ]
560  if len( data ) == 0:
561  msg = ("No files are available for the dataset '%s'. This can be "
562  "due to a typo or due to a DAS problem. Please check the "
563  "spelling of the dataset and/or retry to run "
564  "'validateAlignments.py'."%( self.name() ))
565  raise AllInOneError( msg )
566  fileInformationList = []
567  for file in data:
568  fileName = 'unknown'
569  try:
570  fileName = self.__findInJson(file, "name")
571  fileCreationTime = self.__findInJson(file, "creation_time")
572  fileNEvents = self.__findInJson(file, "nevents")
573  except KeyError:
574  print ("DAS query gives bad output for file '%s'. Skipping it.\n"
575  "It may work if you try again later.") % fileName
576  fileNEvents = 0
577  # select only non-empty files
578  if fileNEvents == 0:
579  continue
580  fileDict = { "name": fileName,
581  "creation_time": fileCreationTime,
582  "nevents": fileNEvents
583  }
584  fileInformationList.append( fileDict )
585  fileInformationList.sort( key=lambda info: self.__findInJson(info,"name") )
586  if parent:
587  self.__parentFileInfoList = fileInformationList
588  else:
589  self.__fileInfoList = fileInformationList
590  return fileInformationList
def __findInJson
Definition: dataset.py:278
def __getFileInfoList
Definition: dataset.py:525
def parentDataset
Definition: dataset.py:696
def dataset.Dataset.__getMagneticField (   self)
private

Definition at line 391 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__cmsswrelease, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.magneticField().

392  def __getMagneticField( self ):
393  Bfieldlocation = os.path.join( self.__cmssw, "python", "Configuration", "StandardSequences" )
394  if not os.path.isdir(Bfieldlocation):
395  Bfieldlocation = os.path.join( self.__cmsswrelease, "python", "Configuration", "StandardSequences" )
396  Bfieldlist = [ f.replace("_cff.py",'') \
397  for f in os.listdir(Bfieldlocation) \
398  if f.startswith("MagneticField_") and f.endswith("_cff.py") ]
399  Bfieldlist.sort( key = lambda Bfield: -len(Bfield) ) #Put it in order of decreasing length, so that searching in the name gives the longer match
400 
401  if self.__predefined:
402  with open(self.__filename) as f:
403  datatype = None
404  Bfield = None
405  for line in f.readlines():
406  if line.startswith("#data type: "):
407  if datatype is not None:
408  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
409  datatype = line.replace("#data type: ", "").replace("\n","")
410  datatype = datatype.split("#")[0].strip()
411  if line.startswith("#magnetic field: "):
412  if Bfield is not None:
413  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
414  Bfield = line.replace("#magnetic field: ", "").replace("\n","")
415  Bfield = Bfield.split("#")[0].strip()
416  if Bfield is not None:
417  Bfield = Bfield.split(",")[0]
418  if Bfield in Bfieldlist or Bfield == "unknown":
419  return Bfield
420  #===========================================================================
421  #For compatibility with already written datasets - remove this at some point
422  #(until the next === line)
423  #It's currently June 2015, anytime starting in 2016 is more than safe
424  elif Bfield == "AutoFromDBCurrent":
425  return "MagneticField"
426  elif "MagneticField_" + Bfield in Bfieldlist:
427  return "MagneticField_" + Bfield
428  #===========================================================================
429  else:
430  print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
431  print "Using Bfield='unknown' - this will revert to the default"
432  return "unknown"
433  elif datatype == "data":
434  return "MagneticField" #this should be in the "#magnetic field" line, but for safety in case it got messed up
435  else:
436  return "unknown"
437 
438  if self.__dataType == "data":
439  return "MagneticField"
440 
441  dasQuery_B = ( 'dataset dataset=%s'%( self.__name ) ) #try to find the magnetic field from DAS
442  data = self.__getData( dasQuery_B ) #it seems to be there for the newer (7X) MC samples, except cosmics
443 
444  try:
445  Bfield = self.__findInJson(data, ["dataset", "mcm", "sequences", "magField"])
446  if Bfield in Bfieldlist:
447  return Bfield
448  elif Bfield == "38T" or Bfield == "38T_PostLS1":
449  return "MagneticField"
450  elif "MagneticField_" + Bfield in Bfieldlist:
451  return "MagneticField_" + Bfield
452  elif Bfield == "":
453  pass
454  else:
455  print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
456  print "Using Bfield='unknown' - this will revert to the default magnetic field"
457  return "unknown"
458  except KeyError:
459  pass
460 
461  for possibleB in Bfieldlist:
462  if (possibleB != "MagneticField"
463  and possibleB.replace("MagneticField_","") in self.__name.replace("TkAlCosmics0T", "")):
464  #final attempt - try to identify the dataset from the name
465  #all cosmics dataset names contain "TkAlCosmics0T"
466  if possibleB == "MagneticField_38T" or possibleB == "MagneticField_38T_PostLS1":
467  return "MagneticField"
468  return possibleB
469 
470  return "unknown"
def __findInJson
Definition: dataset.py:278
def __getMagneticField
Definition: dataset.py:391
def dataset.Dataset.__getMagneticFieldForRun (   self,
  run = -1,
  tolerance = 0.5 
)
private
For MC, this returns the same as the previous function.
   For data, it gets the magnetic field from the runs.  This is important for
   deciding which template to use for offlinevalidation

Definition at line 471 of file dataset.py.

References dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getData(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__lastusedrun, dataset.Dataset.__magneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, funct.abs(), python.rootplot.root2matplotlib.replace(), and split.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticFieldForRun().

472  def __getMagneticFieldForRun( self, run = -1, tolerance = 0.5 ):
473  """For MC, this returns the same as the previous function.
474  For data, it gets the magnetic field from the runs. This is important for
475  deciding which template to use for offlinevalidation
476  """
477  if self.__dataType == "mc" and self.__magneticField == "MagneticField":
478  return 3.8 #For 3.8T MC the default MagneticField is used
479  if "T" in self.__magneticField:
480  Bfield = self.__magneticField.split("T")[0].replace("MagneticField_","")
481  try:
482  return float(Bfield) / 10.0 #e.g. 38T and 38T_PostLS1 both return 3.8
483  except ValueError:
484  pass
485  if self.__predefined:
486  with open(self.__filename) as f:
487  Bfield = None
488  for line in f.readlines():
489  if line.startswith("#magnetic field: ") and "," in line:
490  if Bfield is not None:
491  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
492  return float(line.replace("#magnetic field: ", "").split(",")[1].split("#")[0].strip())
493 
494  if run > 0:
495  dasQuery = ('run = %s'%run) #for data
496  data = self.__getData(dasQuery)
497  try:
498  return self.__findInJson(data, ["run","bfield"])
499  except KeyError:
500  return "unknown Can't get the magnetic field for run %s from DAS" % run
501 
502  #run < 0 - find B field for the first and last runs, and make sure they're compatible
503  # (to within tolerance)
504  #NOT FOOLPROOF! The magnetic field might go up and then down, or vice versa
505  if self.__firstusedrun is None or self.__lastusedrun is None:
506  return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
507  firstrunB = self.__getMagneticFieldForRun(self.__firstusedrun)
508  lastrunB = self.__getMagneticFieldForRun(self.__lastusedrun)
509  try:
510  if abs(firstrunB - lastrunB) <= tolerance:
511  return .5*(firstrunB + lastrunB)
512  print firstrunB, lastrunB, tolerance
513  return ("unknown The beginning and end of your run range for %s\n"
514  "have different magnetic fields (%s, %s)!\n"
515  "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
516  "or increasing the tolerance (in dataset.py) from %s.") % (self.__name, firstrunB, lastrunB, tolerance)
517  except TypeError:
518  try:
519  if "unknown" in firstrunB:
520  return firstrunB
521  else:
522  return lastrunB
523  except TypeError:
524  return lastrunB
def __findInJson
Definition: dataset.py:278
def __getMagneticFieldForRun
Definition: dataset.py:471
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.__getParentDataset (   self)
private

Definition at line 381 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), and dataset.Dataset.__name.

Referenced by dataset.Dataset.parentDataset().

382  def __getParentDataset( self ):
383  dasQuery = "parent dataset=" + self.__name
384  data = self.__getData( dasQuery )
385  try:
386  return self.__findInJson(data, ["parent", "name"])
387  except KeyError:
388  raise AllInOneError("Cannot find the parent of the dataset '" + self.__name + "'\n"
389  "Here is the DAS output:\n" + str(jsondict) +
390  "\nIt's possible that this was a server error. If so, it may work if you try again later")
def __findInJson
Definition: dataset.py:278
def __getParentDataset
Definition: dataset.py:381
def dataset.Dataset.__getRunList (   self)
private

Definition at line 591 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and dataset.Dataset.__runList.

Referenced by dataset.Dataset.__createSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.runList().

592  def __getRunList( self ):
593  if self.__runList:
594  return self.__runList
595  dasQuery_runs = ( 'run dataset=%s | grep run.run_number,'
596  'run.creation_time'%( self.__name ) )
597  print "Requesting run information for '%s' from DAS..."%( self.__name ),
598  data = self.__getData( dasQuery_runs )
599  print "Done."
600  data = [ self.__findInJson(entry,"run") for entry in data ]
601  data.sort( key = lambda run: self.__findInJson(run, "run_number") )
602  self.__runList = data
603  return data
def __findInJson
Definition: dataset.py:278
def __getRunList
Definition: dataset.py:591
def dataset.Dataset.convertTimeToRun (   self,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  shortTuple = True 
)

Definition at line 618 of file dataset.py.

References dataset.Dataset.__dateString(), dataset.Dataset.__datetime(), dataset.Dataset.__find_ge(), dataset.Dataset.__find_lt(), dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getRunList(), and dataset.Dataset.__name.

Referenced by dataset.Dataset.__createSnippet(), and dataset.Dataset.__dateString().

619  shortTuple = True ):
620  if ( begin and firstRun ) or ( end and lastRun ):
621  msg = ( "The Usage of "
622  + "'begin' & 'firstRun' " * int( bool( begin and
623  firstRun ) )
624  + "and " * int( bool( ( begin and firstRun ) and
625  ( end and lastRun ) ) )
626  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
627  + "is ambigous." )
628  raise AllInOneError( msg )
629 
630  if begin or end:
631  runList = [ self.__findInJson(run, "run_number") for run in self.__getRunList() ]
632 
633  if begin:
634  lastdate = begin
635  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months after begin
636  firstdate = lastdate
637  lastdate = self.__dateString(self.__datetime(firstdate) + datetime.timedelta(delta))
638  dasQuery_begin = "run date between[%s,%s]" % (firstdate, lastdate)
639  begindata = self.__getData(dasQuery_begin)
640  if len(begindata) > 0:
641  begindata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
642  try:
643  runIndex = self.__find_ge( runList, self.__findInJson(begindata[0], ["run", "run_number"]))
644  except ValueError:
645  msg = ( "Your 'begin' is after the creation time of the last "
646  "run in the dataset\n'%s'"%( self.__name ) )
647  raise AllInOneError( msg )
648  firstRun = runList[runIndex]
649  begin = None
650  break
651 
652  if begin:
653  raise AllInOneError("No runs within a reasonable time interval after your 'begin'."
654  "Try using a 'begin' that has runs soon after it (within 2 months at most)")
655 
656  if end:
657  firstdate = end
658  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months before end
659  lastdate = firstdate
660  firstdate = self.__dateString(self.__datetime(lastdate) - datetime.timedelta(delta))
661  dasQuery_end = "run date between[%s,%s]" % (firstdate, lastdate)
662  enddata = self.__getData(dasQuery_end)
663  if len(enddata) > 0:
664  enddata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
665  try:
666  runIndex = self.__find_lt( runList, self.__findInJson(enddata[-1], ["run", "run_number"]))
667  except ValueError:
668  msg = ( "Your 'end' is before the creation time of the first "
669  "run in the dataset\n'%s'"%( self.__name ) )
670  raise AllInOneError( msg )
671  lastRun = runList[runIndex]
672  end = None
673  break
674 
675  if end:
676  raise AllInOneError("No runs within a reasonable time interval before your 'end'."
677  "Try using an 'end' that has runs soon before it (within 2 months at most)")
678 
679  if shortTuple:
680  return firstRun, lastRun
681  else:
682  return begin, end, firstRun, lastRun
def __findInJson
Definition: dataset.py:278
def __getRunList
Definition: dataset.py:591
def __dateString
Definition: dataset.py:613
def dataset.Dataset.datasetSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  crab = False,
  parent = False 
)

Definition at line 702 of file dataset.py.

References dataset.Dataset.__createSnippet(), dataset.Dataset.__filename, dataset.Dataset.__name, dataset.Dataset.__official, dataset.Dataset.__origName, dataset.Dataset.__predefined, and dataset.Dataset.dump_cff().

Referenced by dataset.Dataset.parentDataset().

703  firstRun = None, lastRun = None, crab = False, parent = False ):
704  if self.__predefined and parent:
705  with open(self.__filename) as f:
706  if "secFiles.extend" not in f.read():
707  msg = ("The predefined dataset '%s' does not contain secondary files, "
708  "which your validation requires!") % self.__name
709  if self.__official:
710  self.__name = self.__origName
711  self.__predefined = False
712  print msg
713  print ("Retreiving the files from DAS. You will be asked if you want "
714  "to overwrite the old dataset.\n"
715  "It will still be compatible with validations that don't need secondary files.")
716  else:
717  raise AllInOneError(msg)
718 
719  if self.__predefined:
720  snippet = ("process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
721  "process.maxEvents = cms.untracked.PSet(\n"
722  " input = cms.untracked.int32(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.)\n"
723  ")\n"
724  "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)"
725  %(self.__name))
726  if not parent:
727  with open(self.__filename) as f:
728  if "secFiles.extend" in f.read():
729  snippet += "\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
730  return snippet
731  theMap = { "process": "process.",
732  "tab": " " * len( "process." ),
733  "nEvents": ".oO[nEvents]Oo. / .oO[parallelJobs]Oo.",
734  "skipEventsString": "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)\n",
735  "importCms": "",
736  "header": ""
737  }
738  datasetSnippet = self.__createSnippet( jsonPath = jsonPath,
739  begin = begin,
740  end = end,
741  firstRun = firstRun,
742  lastRun = lastRun,
743  repMap = theMap,
744  crab = crab,
745  parent = parent )
746  if jsonPath == "" and begin == "" and end == "" and firstRun == "" and lastRun == "":
747  try:
748  self.dump_cff(parent = parent)
749  except AllInOneError, e:
750  print "Can't store the dataset as a cff:"
751  print e
752  print "This may be inconvenient in the future, but will not cause a problem for this validation."
753  return datasetSnippet
def __createSnippet
Definition: dataset.py:117
def dataset.Dataset.dataType (   self)

Definition at line 683 of file dataset.py.

References dataset.Dataset.__dataType, and dataset.Dataset.__getDataType().

684  def dataType( self ):
685  if not self.__dataType:
686  self.__dataType = self.__getDataType()
687  return self.__dataType
def __getDataType
Definition: dataset.py:356
def dataset.Dataset.dump_cff (   self,
  outName = None,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  parent = False 
)

Definition at line 755 of file dataset.py.

References dataset.Dataset.__alreadyStored, dataset.Dataset.__cmssw, dataset.Dataset.__createSnippet(), dataset.Dataset.__dataType, dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__magneticField, dataset.Dataset.__name, python.rootplot.root2matplotlib.replace(), and split.

Referenced by dataset.Dataset.datasetSnippet().

756  end = None, firstRun = None, lastRun = None, parent = False ):
757  if self.__alreadyStored:
758  return
759  self.__alreadyStored = True
760  if outName == None:
761  outName = "Dataset" + self.__name.replace("/", "_")
762  packageName = os.path.join( "Alignment", "OfflineValidation" )
763  if not os.path.exists( os.path.join(
764  self.__cmssw, "src", packageName ) ):
765  msg = ("You try to store the predefined dataset'%s'.\n"
766  "For that you need to check out the package '%s' to your "
767  "private relase area in\n"%( outName, packageName )
768  + self.__cmssw )
769  raise AllInOneError( msg )
770  theMap = { "process": "",
771  "tab": "",
772  "nEvents": str( -1 ),
773  "skipEventsString": "",
774  "importCms": "import FWCore.ParameterSet.Config as cms\n",
775  "header": "#Do not delete or (unless you know what you're doing) change these comments\n"
776  "#%(name)s\n"
777  "#data type: %(dataType)s\n"
778  "#magnetic field: .oO[magneticField]Oo.\n" #put in magnetic field later
779  %{"name": self.__name, #need to create the snippet before getting the magnetic field
780  "dataType": self.__dataType} #so that we know the first and last runs
781  }
782  dataset_cff = self.__createSnippet( jsonPath = jsonPath,
783  begin = begin,
784  end = end,
785  firstRun = firstRun,
786  lastRun = lastRun,
787  repMap = theMap,
788  parent = parent)
789  magneticField = self.__magneticField
790  if magneticField == "MagneticField":
791  magneticField = "%s, %s #%s" % (magneticField,
792  str(self.__getMagneticFieldForRun()).replace("\n"," ").split("#")[0].strip(),
793  "Use MagneticField_cff.py; the number is for determining which track selection to use."
794  )
795  dataset_cff = dataset_cff.replace(".oO[magneticField]Oo.",magneticField)
796  filePath = os.path.join( self.__cmssw, "src", packageName,
797  "python", outName + "_cff.py" )
798  if os.path.exists( filePath ):
799  existMsg = "The predefined dataset '%s' already exists.\n"%( outName )
800  askString = "Do you want to overwrite it? [y/n]\n"
801  inputQuery = existMsg + askString
802  while True:
803  userInput = raw_input( inputQuery ).lower()
804  if userInput == "y":
805  break
806  elif userInput == "n":
807  return
808  else:
809  inputQuery = askString
810  print ( "The predefined dataset '%s' will be stored in the file\n"
811  %( outName )
812  + filePath +
813  "\nFor future use you have to do 'scram b'." )
814  print
815  theFile = open( filePath, "w" )
816  theFile.write( dataset_cff )
817  theFile.close()
818  return
def __getMagneticFieldForRun
Definition: dataset.py:471
def __createSnippet
Definition: dataset.py:117
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.fileInfoList (   self,
  parent = False 
)

Definition at line 834 of file dataset.py.

References dataset.Dataset.__dasLimit, and dataset.Dataset.__getFileInfoList().

Referenced by dataset.Dataset.fileList().

835  def fileInfoList( self, parent = False ):
836  return self.__getFileInfoList( self.__dasLimit, parent )
def __getFileInfoList
Definition: dataset.py:525
def fileInfoList
Definition: dataset.py:834
def dataset.Dataset.fileList (   self,
  parent = False 
)

Definition at line 819 of file dataset.py.

References dataset.Dataset.__fileList, dataset.Dataset.__findInJson(), dataset.Dataset.__parentFileList, and dataset.Dataset.fileInfoList().

Referenced by dataset.Dataset.__createSnippet().

820  def fileList( self, parent = False ):
821  if self.__fileList and not parent:
822  return self.__fileList
823  if self.__parentFileList and parent:
824  return self.__parentFileList
825 
826  fileList = [ self.__findInJson(fileInfo,"name") \
827  for fileInfo in self.fileInfoList(parent) ]
828 
829  if not parent:
830  self.__fileList = fileList
831  else:
832  self.__parentFileList = fileList
833  return fileList
def __findInJson
Definition: dataset.py:278
def fileInfoList
Definition: dataset.py:834
def dataset.Dataset.forcerunrange (   self,
  firstRun,
  lastRun,
  s 
)
s must be in the format run1:lum1-run2:lum2

Definition at line 300 of file dataset.py.

References dataset.Dataset.__firstusedrun, dataset.Dataset.__lastusedrun, and split.

Referenced by dataset.Dataset.getForceRunRangeFunction().

301  def forcerunrange(self, firstRun, lastRun, s):
302  """s must be in the format run1:lum1-run2:lum2"""
303  s = s.group()
304  run1 = s.split("-")[0].split(":")[0]
305  lum1 = s.split("-")[0].split(":")[1]
306  run2 = s.split("-")[1].split(":")[0]
307  lum2 = s.split("-")[1].split(":")[1]
308  if int(run2) < firstRun or int(run1) > lastRun:
309  return ""
310  if int(run1) < firstRun or firstRun < 0:
311  run1 = firstRun
312  lum1 = 1
313  if int(run2) > lastRun:
314  run2 = lastRun
315  lum2 = "max"
316  if int(run1) < self.__firstusedrun or self.__firstusedrun < 0:
317  self.__firstusedrun = int(run1)
318  if int(run2) > self.__lastusedrun:
319  self.__lastusedrun = int(run2)
320  return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
def forcerunrange
Definition: dataset.py:300
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.getForceRunRangeFunction (   self,
  firstRun,
  lastRun 
)

Definition at line 321 of file dataset.py.

References dataset.Dataset.forcerunrange().

Referenced by dataset.Dataset.__createSnippet().

322  def getForceRunRangeFunction(self, firstRun, lastRun):
323  def forcerunrangefunction(s):
324  return self.forcerunrange(firstRun, lastRun, s)
325  return forcerunrangefunction
def forcerunrange
Definition: dataset.py:300
def getForceRunRangeFunction
Definition: dataset.py:321
def dataset.Dataset.magneticField (   self)

Definition at line 688 of file dataset.py.

References dataset.Dataset.__getMagneticField(), and dataset.Dataset.__magneticField.

689  def magneticField( self ):
690  if not self.__magneticField:
691  self.__magneticField = self.__getMagneticField()
692  return self.__magneticField
def magneticField
Definition: dataset.py:688
def __getMagneticField
Definition: dataset.py:391
def dataset.Dataset.magneticFieldForRun (   self,
  run = -1 
)

Definition at line 693 of file dataset.py.

References dataset.Dataset.__getMagneticFieldForRun().

694  def magneticFieldForRun( self, run = -1 ):
695  return self.__getMagneticFieldForRun(run)
def __getMagneticFieldForRun
Definition: dataset.py:471
def magneticFieldForRun
Definition: dataset.py:693
def dataset.Dataset.name (   self)

Definition at line 837 of file dataset.py.

References dataset.Dataset.__name.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), cuy.divideElement.__init__(), cuy.plotElement.__init__(), cuy.additionElement.__init__(), cuy.superimposeElement.__init__(), cuy.graphElement.__init__(), config.CFG.__str__(), validation.Sample.digest(), VIDSelectorBase.VIDSelectorBase.initialize(), and Vispa.Views.PropertyView.Property.valueChanged().

838  def name( self ):
839  return self.__name
def dataset.Dataset.parentDataset (   self)

Definition at line 696 of file dataset.py.

References dataset.Dataset.__getParentDataset(), dataset.Dataset.__parentDataset, and dataset.Dataset.datasetSnippet().

Referenced by dataset.Dataset.__getFileInfoList().

697  def parentDataset( self ):
698  if not self.__parentDataset:
699  self.__parentDataset = self.__getParentDataset()
700  return self.__parentDataset
def parentDataset
Definition: dataset.py:696
def __getParentDataset
Definition: dataset.py:381
def dataset.Dataset.predefined (   self)

Definition at line 840 of file dataset.py.

References dataset.Dataset.__predefined.

Referenced by dataset.Dataset.__createSnippet().

841  def predefined( self ):
842  return self.__predefined
def dataset.Dataset.runList (   self)

Definition at line 843 of file dataset.py.

References dataset.Dataset.__getRunList(), and dataset.Dataset.__runList.

844  def runList( self ):
845  if self.__runList:
846  return self.__runList
847  return self.__getRunList()
848 
def __getRunList
Definition: dataset.py:591

Member Data Documentation

dataset.Dataset.__alreadyStored
private

Definition at line 23 of file dataset.py.

Referenced by dataset.Dataset.dump_cff().

dataset.Dataset.__cmssw
private

Definition at line 24 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), and dataset.Dataset.dump_cff().

dataset.Dataset.__cmsswrelease
private

Definition at line 25 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField().

dataset.Dataset.__dasLimit
private

Definition at line 19 of file dataset.py.

Referenced by dataset.Dataset.fileInfoList().

dataset.Dataset.__dataType
private

Definition at line 76 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dataType(), and dataset.Dataset.dump_cff().

tuple dataset.Dataset.__dummy_source_template
staticprivate
Initial value:
1 = ("readFiles = cms.untracked.vstring()\n"
2  "secFiles = cms.untracked.vstring()\n"
3  "%(process)ssource = cms.Source(\"PoolSource\",\n"
4  "%(tab)s secondaryFileNames ="
5  "secFiles,\n"
6  "%(tab)s fileNames = readFiles\n"
7  ")\n"
8  "readFiles.extend(['dummy_File.root'])\n"
9  "%(process)smaxEvents = cms.untracked.PSet( "
10  "input = cms.untracked.int32(%(nEvents)s) )\n"
11  "%(skipEventsString)s\n")

Definition at line 103 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.__fileInfoList
private

Definition at line 21 of file dataset.py.

Referenced by dataset.Dataset.__getFileInfoList().

dataset.Dataset.__fileList
private

Definition at line 20 of file dataset.py.

Referenced by dataset.Dataset.fileList().

dataset.Dataset.__filename
private

Definition at line 53 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.datasetSnippet(), csvReporter.csvReporter.writeRow(), and csvReporter.csvReporter.writeRows().

dataset.Dataset.__firstusedrun
private

Definition at line 26 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet(), dataset.Dataset.__getMagneticFieldForRun(), and dataset.Dataset.forcerunrange().

dataset.Dataset.__lastusedrun
private

Definition at line 27 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet(), dataset.Dataset.__getMagneticFieldForRun(), and dataset.Dataset.forcerunrange().

dataset.Dataset.__magneticField
private

Definition at line 77 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticField().

dataset.Dataset.__name
private

Definition at line 17 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.convertTimeToRun(), dataset.Dataset.datasetSnippet(), dataset.Dataset.dump_cff(), Config.Process.dumpConfig(), Config.Process.dumpPython(), dataset.Dataset.name(), and Config.Process.name_().

dataset.Dataset.__official
private

Definition at line 34 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__origName
private

Definition at line 18 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__parentDataset
private

Definition at line 28 of file dataset.py.

Referenced by dataset.Dataset.parentDataset().

dataset.Dataset.__parentFileInfoList
private

Definition at line 30 of file dataset.py.

Referenced by dataset.Dataset.__getFileInfoList().

dataset.Dataset.__parentFileList
private

Definition at line 29 of file dataset.py.

Referenced by dataset.Dataset.fileList().

dataset.Dataset.__predefined
private

Definition at line 50 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.predefined().

dataset.Dataset.__runList
private

Definition at line 22 of file dataset.py.

Referenced by dataset.Dataset.__getRunList(), and dataset.Dataset.runList().