CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
genericValidation.py
Go to the documentation of this file.
1 import os
2 import re
3 import json
4 import globalDictionaries
5 import configTemplates
6 from dataset import Dataset
7 from helperFunctions import replaceByMap, addIndex, getCommandOutput2
8 from plottingOptions import PlottingOptions
9 from TkAlExceptions import AllInOneError
10 
11 
13  defaultReferenceName = "DEFAULT"
14  def __init__(self, valName, alignment, config, valType,
15  addDefaults = {}, addMandatories=[], addneedpackages=[]):
16  import random
17  self.name = valName
18  self.valType = valType
19  self.alignmentToValidate = alignment
20  self.general = config.getGeneral()
21  self.randomWorkdirPart = "%0i"%random.randint(1,10e9)
22  self.configFiles = []
23  self.filesToCompare = {}
24  self.config = config
25  self.jobid = ""
26 
27  defaults = {
28  "jobmode": self.general["jobmode"],
29  "cmssw": os.environ['CMSSW_BASE'],
30  "parallelJobs": "1",
31  "jobid": "",
32  }
33  defaults.update(addDefaults)
34  mandatories = []
35  mandatories += addMandatories
36  needpackages = ["Alignment/OfflineValidation"]
37  needpackages += addneedpackages
38  theUpdate = config.getResultingSection(valType+":"+self.name,
39  defaultDict = defaults,
40  demandPars = mandatories)
41  self.general.update(theUpdate)
42  self.jobmode = self.general["jobmode"]
43  self.NJobs = int(self.general["parallelJobs"])
44 
45  # limit maximum number of parallel jobs to 40
46  # (each output file is approximately 20MB)
47  maximumNumberJobs = 40
48  if self.NJobs > maximumNumberJobs:
49  msg = ("Maximum allowed number of parallel jobs "
50  +str(maximumNumberJobs)+" exceeded!!!")
51  raise AllInOneError(msg)
52 
53  self.jobid = self.general["jobid"]
54  if self.jobid:
55  try: #make sure it's actually a valid jobid
56  output = getCommandOutput2("bjobs %(jobid)s 2>&1"%self.general)
57  if "is not found" in output: raise RuntimeError
58  except RuntimeError:
59  raise AllInOneError("%s is not a valid jobid.\nMaybe it finished already?"%self.jobid)
60 
61  self.cmssw = self.general["cmssw"]
62  badcharacters = r"\'"
63  for character in badcharacters:
64  if character in self.cmssw:
65  raise AllInOneError("The bad characters " + badcharacters + " are not allowed in the cmssw\n"
66  "path name. If you really have it in such a ridiculously named location,\n"
67  "try making a symbolic link somewhere with a decent name.")
68  try:
69  os.listdir(self.cmssw)
70  except OSError:
71  raise AllInOneError("Your cmssw release " + self.cmssw + ' does not exist')
72 
73  if self.cmssw == os.environ["CMSSW_BASE"]:
74  self.scramarch = os.environ["SCRAM_ARCH"]
75  self.cmsswreleasebase = os.environ["CMSSW_RELEASE_BASE"]
76  else:
77  command = ("cd '" + self.cmssw + "' && eval `scramv1 ru -sh 2> /dev/null`"
78  ' && echo "$CMSSW_BASE\n$SCRAM_ARCH\n$CMSSW_RELEASE_BASE"')
79  commandoutput = getCommandOutput2(command).split('\n')
80  self.cmssw = commandoutput[0]
81  self.scramarch = commandoutput[1]
82  self.cmsswreleasebase = commandoutput[2]
83 
84  self.packages = {}
85  for package in needpackages:
86  for placetolook in self.cmssw, self.cmsswreleasebase:
87  pkgpath = os.path.join(placetolook, "src", package)
88  if os.path.exists(pkgpath):
89  self.packages[package] = pkgpath
90  break
91  else:
92  raise AllInOneError("Package {} does not exist in {} or {}!".format(package, self.cmssw, self.cmsswreleasebase))
93 
94  self.AutoAlternates = True
95  if config.has_option("alternateTemplates","AutoAlternates"):
96  try:
97  self.AutoAlternates = json.loads(config.get("alternateTemplates","AutoAlternates").lower())
98  except ValueError:
99  raise AllInOneError("AutoAlternates needs to be true or false, not %s" % config.get("alternateTemplates","AutoAlternates"))
100 
101  knownOpts = defaults.keys()+mandatories
102  ignoreOpts = []
103  config.checkInput(valType+":"+self.name,
104  knownSimpleOptions = knownOpts,
105  ignoreOptions = ignoreOpts)
106 
107  def getRepMap(self, alignment = None):
108  if alignment == None:
109  alignment = self.alignmentToValidate
110  try:
111  result = PlottingOptions(self.config, self.valType)
112  except KeyError:
113  result = {}
114  result.update(alignment.getRepMap())
115  result.update( self.general )
116  result.update({
117  "workdir": os.path.join(self.general["workdir"],
118  self.randomWorkdirPart),
119  "datadir": self.general["datadir"],
120  "logdir": self.general["logdir"],
121  "CommandLineTemplate": ("#run configfile and post-proccess it\n"
122  "cmsRun %(cfgFile)s\n"
123  "%(postProcess)s "),
124  "CMSSW_BASE": self.cmssw,
125  "SCRAM_ARCH": self.scramarch,
126  "CMSSW_RELEASE_BASE": self.cmsswreleasebase,
127  "alignmentName": alignment.name,
128  "condLoad": alignment.getConditions(),
129  "LoadGlobalTagTemplate": configTemplates.loadGlobalTagTemplate,
130  })
131  result.update(self.packages)
132  return result
133 
134  def getCompareStrings( self, requestId = None, plain = False ):
135  result = {}
136  repMap = self.alignmentToValidate.getRepMap()
137  for validationId in self.filesToCompare:
138  repMap["file"] = self.filesToCompare[ validationId ]
139  if repMap["file"].startswith( "/castor/" ):
140  repMap["file"] = "rfio:%(file)s"%repMap
141  elif repMap["file"].startswith( "/store/" ):
142  repMap["file"] = "root://eoscms.cern.ch//eos/cms%(file)s"%repMap
143  if plain:
144  result[validationId]=repMap["file"]
145  else:
146  result[validationId]= "%(file)s=%(title)s|%(color)s|%(style)s"%repMap
147  if requestId == None:
148  return result
149  else:
150  if not "." in requestId:
151  requestId += ".%s"%GenericValidation.defaultReferenceName
152  if not requestId.split(".")[-1] in result:
153  msg = ("could not find %s in reference Objects!"
154  %requestId.split(".")[-1])
155  raise AllInOneError(msg)
156  return result[ requestId.split(".")[-1] ]
157 
158  def createFiles(self, fileContents, path, repMap = None, repMaps = None):
159  """repMap: single map for all files
160  repMaps: a dict, with the filenames as the keys"""
161  if repMap is not None and repMaps is not None:
162  raise AllInOneError("createFiles can only take repMap or repMaps (or neither), not both")
163  result = []
164  for fileName in fileContents:
165  filePath = os.path.join(path, fileName)
166  result.append(filePath)
167 
168  for (i, filePathi) in enumerate(addIndex(filePath, self.NJobs)):
169  theFile = open( filePathi, "w" )
170  fileContentsi = fileContents[ fileName ]
171  if repMaps is not None:
172  repMap = repMaps[fileName]
173  if repMap is not None:
174  repMap.update({"nIndex": str(i)})
175  fileContentsi = replaceByMap(fileContentsi, repMap)
176  theFile.write( fileContentsi )
177  theFile.close()
178 
179  return result
180 
181  def createConfiguration(self, fileContents, path, schedule = None, repMap = None, repMaps = None):
182  self.configFiles = GenericValidation.createFiles(self, fileContents,
183  path, repMap = repMap, repMaps = repMaps)
184  if not schedule == None:
185  schedule = [os.path.join( path, cfgName) for cfgName in schedule]
186  for cfgName in schedule:
187  if not cfgName in self.configFiles:
188  msg = ("scheduled %s missing in generated configfiles: %s"
189  %(cfgName, self.configFiles))
190  raise AllInOneError(msg)
191  for cfgName in self.configFiles:
192  if not cfgName in schedule:
193  msg = ("generated configuration %s not scheduled: %s"
194  %(cfgName, schedule))
195  raise AllInOneError(msg)
196  self.configFiles = schedule
197  return self.configFiles
198 
199  def createScript(self, fileContents, path, downloadFiles=[], repMap = None, repMaps = None):
200  self.scriptFiles = GenericValidation.createFiles(self, fileContents,
201  path, repMap = repMap, repMaps = repMaps)
202  for script in self.scriptFiles:
203  for scriptwithindex in addIndex(script, self.NJobs):
204  os.chmod(scriptwithindex,0o755)
205  return self.scriptFiles
206 
207  def createCrabCfg(self, fileContents, path ):
208  if self.NJobs > 1:
209  msg = ("jobmode 'crab' not supported for parallel validation."
210  " Please set parallelJobs = 1.")
211  raise AllInOneError(msg)
212  self.crabConfigFiles = GenericValidation.createFiles(self, fileContents,
213  path)
214  return self.crabConfigFiles
215 
216 
218  """
219  Subclass of `GenericValidation` which is the base for validations using
220  datasets.
221  """
222 
223  def __init__(self, valName, alignment, config, valType,
224  addDefaults = {}, addMandatories=[], addneedpackages=[]):
225  """
226  This method adds additional items to the `self.general` dictionary
227  which are only needed for validations using datasets.
228 
229  Arguments:
230  - `valName`: String which identifies individual validation instances
231  - `alignment`: `Alignment` instance to validate
232  - `config`: `BetterConfigParser` instance which includes the
233  configuration of the validations
234  - `valType`: String which specifies the type of validation
235  - `addDefaults`: Dictionary which contains default values for individual
236  validations in addition to the general default values
237  - `addMandatories`: List which contains mandatory parameters for
238  individual validations in addition to the general
239  mandatory parameters
240  """
241 
242  defaults = {"runRange": "",
243  "firstRun": "",
244  "lastRun": "",
245  "begin": "",
246  "end": "",
247  "JSON": ""
248  }
249  defaults.update(addDefaults)
250  mandatories = [ "dataset", "maxevents" ]
251  mandatories += addMandatories
252  needpackages = addneedpackages
253  GenericValidation.__init__(self, valName, alignment, config, valType, defaults, mandatories, needpackages)
254 
255  # if maxevents is not specified, cannot calculate number of events for
256  # each parallel job, and therefore running only a single job
257  if int( self.general["maxevents"] ) == -1 and self.NJobs > 1:
258  msg = ("Maximum number of events (maxevents) not specified: "
259  "cannot use parallel jobs.")
260  raise AllInOneError(msg)
261 
262  tryPredefinedFirst = (not self.jobmode.split( ',' )[0] == "crab" and self.general["JSON"] == ""
263  and self.general["firstRun"] == "" and self.general["lastRun"] == ""
264  and self.general["begin"] == "" and self.general["end"] == "")
265 
266  if self.general["dataset"] not in globalDictionaries.usedDatasets:
267  globalDictionaries.usedDatasets[self.general["dataset"]] = {}
268 
269  if self.cmssw not in globalDictionaries.usedDatasets[self.general["dataset"]]:
270  if globalDictionaries.usedDatasets[self.general["dataset"]] != {}:
271  print ("Warning: you use the same dataset '%s' in multiple cmssw releases.\n"
272  "This is allowed, but make sure it's not a mistake") % self.general["dataset"]
273  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw] = {False: None, True: None}
274 
275  if globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] is None:
276  dataset = Dataset(
277  self.general["dataset"], tryPredefinedFirst = tryPredefinedFirst,
278  cmssw = self.cmssw, cmsswrelease = self.cmsswreleasebase )
279  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] = dataset
280  if tryPredefinedFirst and not dataset.predefined(): #No point finding the data twice in that case
281  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][False] = dataset
282 
283  self.dataset = globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst]
284  self.general["magneticField"] = self.dataset.magneticField()
285  self.general["defaultMagneticField"] = "MagneticField"
286  if self.general["magneticField"] == "unknown":
287  print "Could not get the magnetic field for this dataset."
288  print "Using the default: ", self.general["defaultMagneticField"]
289  self.general["magneticField"] = '.oO[defaultMagneticField]Oo.'
290 
291  if not self.jobmode.split( ',' )[0] == "crab":
292  try:
293  self.general["datasetDefinition"] = self.dataset.datasetSnippet(
294  jsonPath = self.general["JSON"],
295  firstRun = self.general["firstRun"],
296  lastRun = self.general["lastRun"],
297  begin = self.general["begin"],
298  end = self.general["end"],
299  parent = self.needParentFiles )
300  except AllInOneError as e:
301  msg = "In section [%s:%s]: "%(valType, self.name)
302  msg += str(e)
303  raise AllInOneError(msg)
304  else:
305  if self.dataset.predefined():
306  msg = ("For jobmode 'crab' you cannot use predefined datasets "
307  "(in your case: '%s')."%( self.dataset.name() ))
308  raise AllInOneError( msg )
309  try:
310  theUpdate = config.getResultingSection(valType+":"+self.name,
311  demandPars = ["parallelJobs"])
312  except AllInOneError as e:
313  msg = str(e)[:-1]+" when using 'jobmode: crab'."
314  raise AllInOneError(msg)
315  self.general.update(theUpdate)
316  if self.general["begin"] or self.general["end"]:
317  ( self.general["begin"],
318  self.general["end"],
319  self.general["firstRun"],
320  self.general["lastRun"] ) = self.dataset.convertTimeToRun(
321  firstRun = self.general["firstRun"],
322  lastRun = self.general["lastRun"],
323  begin = self.general["begin"],
324  end = self.general["end"],
325  shortTuple = False)
326  if self.general["begin"] == None:
327  self.general["begin"] = ""
328  if self.general["end"] == None:
329  self.general["end"] = ""
330  self.general["firstRun"] = str( self.general["firstRun"] )
331  self.general["lastRun"] = str( self.general["lastRun"] )
332  if ( not self.general["firstRun"] ) and \
333  ( self.general["end"] or self.general["lastRun"] ):
334  self.general["firstRun"] = str(
335  self.dataset.runList()[0]["run_number"])
336  if ( not self.general["lastRun"] ) and \
337  ( self.general["begin"] or self.general["firstRun"] ):
338  self.general["lastRun"] = str(
339  self.dataset.runList()[-1]["run_number"])
340  if self.general["firstRun"] and self.general["lastRun"]:
341  if int(self.general["firstRun"]) > int(self.general["lastRun"]):
342  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
343  "chosen is greater than the upper time/runrange limit "
344  "('end'/'lastRun').")
345  raise AllInOneError( msg )
346  self.general["runRange"] = (self.general["firstRun"]
347  + '-' + self.general["lastRun"])
348  try:
349  self.general["datasetDefinition"] = self.dataset.datasetSnippet(
350  jsonPath = self.general["JSON"],
351  firstRun = self.general["firstRun"],
352  lastRun = self.general["lastRun"],
353  begin = self.general["begin"],
354  end = self.general["end"],
355  crab = True )
356  except AllInOneError as e:
357  msg = "In section [%s:%s]: "%(valType, self.name)
358  msg += str( e )
359  raise AllInOneError( msg )
360 
361  def getRepMap(self, alignment = None):
362  result = GenericValidation.getRepMap(self, alignment)
363  outputfile = os.path.expandvars(replaceByMap(
364  "%s_%s_.oO[name]Oo..root" % (self.outputBaseName, self.name)
365  , result))
366  resultfile = os.path.expandvars(replaceByMap(("/store/caf/user/$USER/.oO[eosdir]Oo./" +
367  "%s_%s_.oO[name]Oo..root" % (self.resultBaseName, self.name))
368  , result))
369  result.update({
370  "resultFile": ".oO[resultFiles[.oO[nIndex]Oo.]]Oo.",
371  "resultFiles": addIndex(resultfile, self.NJobs),
372  "finalResultFile": resultfile,
373  "outputFile": ".oO[outputFiles[.oO[nIndex]Oo.]]Oo.",
374  "outputFiles": addIndex(outputfile, self.NJobs),
375  "finalOutputFile": outputfile
376  })
377  return result
378 
379  def createScript(self, path, template = configTemplates.scriptTemplate, downloadFiles=[], repMap = None, repMaps = None):
380  scriptName = "%s.%s.%s.sh"%(self.scriptBaseName, self.name,
381  self.alignmentToValidate.name )
382  if repMap is None and repMaps is None:
383  repMap = self.getRepMap()
384  repMap["CommandLine"]=""
385  for cfg in self.configFiles:
386  repMap["CommandLine"]+= repMap["CommandLineTemplate"]%{"cfgFile":addIndex(cfg, self.NJobs, ".oO[nIndex]Oo."),
387  "postProcess":""
388  }
389  scripts = {scriptName: template}
390  return GenericValidation.createScript(self, scripts, path, downloadFiles = downloadFiles,
391  repMap = repMap, repMaps = repMaps)
392 
393  def createCrabCfg(self, path, crabCfgBaseName):
394  """
395  Method which creates a `crab.cfg` for a validation on datasets.
396 
397  Arguments:
398  - `path`: Path at which the file will be stored.
399  - `crabCfgBaseName`: String which depends on the actual type of
400  validation calling this method.
401  """
402  crabCfgName = "crab.%s.%s.%s.cfg"%( crabCfgBaseName, self.name,
403  self.alignmentToValidate.name )
404  repMap = self.getRepMap()
405  repMap["script"] = "dummy_script.sh"
406  # repMap["crabOutputDir"] = os.path.basename( path )
407  repMap["crabWorkingDir"] = crabCfgName.split( '.cfg' )[0]
408  self.crabWorkingDir = repMap["crabWorkingDir"]
409  repMap["numberOfJobs"] = self.general["parallelJobs"]
410  repMap["cfgFile"] = self.configFiles[0]
411  repMap["queue"] = self.jobmode.split( ',' )[1].split( '-q' )[1]
412  if self.dataset.dataType() == "mc":
413  repMap["McOrData"] = "events = .oO[nEvents]Oo."
414  elif self.dataset.dataType() == "data":
415  repMap["McOrData"] = "lumis = -1"
416  if self.jobmode.split( ',' )[0] == "crab":
417  print ("For jobmode 'crab' the parameter 'maxevents' will be "
418  "ignored and all events will be processed.")
419  else:
420  raise AllInOneError("Unknown data type! Can't run in crab mode")
421  crabCfg = {crabCfgName: replaceByMap( configTemplates.crabCfgTemplate,
422  repMap ) }
423  return GenericValidation.createCrabCfg( self, crabCfg, path )
def replaceByMap
— Helpers —############################
double split
Definition: MVATrainer.cc:139