CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
genericValidation.py
Go to the documentation of this file.
1 import os
2 import re
3 import json
4 import globalDictionaries
5 import configTemplates
6 from dataset import Dataset
7 from helperFunctions import replaceByMap, addIndex, getCommandOutput2
8 from plottingOptions import PlottingOptions
9 from TkAlExceptions import AllInOneError
10 
11 
13  defaultReferenceName = "DEFAULT"
14  def __init__(self, valName, alignment, config, valType,
15  addDefaults = {}, addMandatories=[], addneedpackages=[]):
16  import random
17  self.name = valName
18  self.valType = valType
19  self.alignmentToValidate = alignment
20  self.general = config.getGeneral()
21  self.randomWorkdirPart = "%0i"%random.randint(1,10e9)
22  self.configFiles = []
23  self.filesToCompare = {}
24  self.config = config
25 
26  defaults = {
27  "jobmode": self.general["jobmode"],
28  "cmssw": os.environ['CMSSW_BASE'],
29  "parallelJobs": "1",
30  "jobid": "",
31  }
32  defaults.update(addDefaults)
33  mandatories = []
34  mandatories += addMandatories
35  needpackages = ["Alignment/OfflineValidation"]
36  needpackages += addneedpackages
37  theUpdate = config.getResultingSection(valType+":"+self.name,
38  defaultDict = defaults,
39  demandPars = mandatories)
40  self.general.update(theUpdate)
41  self.jobmode = self.general["jobmode"]
42  self.NJobs = int(self.general["parallelJobs"])
43 
44  # limit maximum number of parallel jobs to 40
45  # (each output file is approximately 20MB)
46  maximumNumberJobs = 40
47  if self.NJobs > maximumNumberJobs:
48  msg = ("Maximum allowed number of parallel jobs "
49  +str(maximumNumberJobs)+" exceeded!!!")
50  raise AllInOneError(msg)
51 
52  self.jobid = self.general["jobid"]
53  if self.jobid:
54  try: #make sure it's actually a valid jobid
55  output = getCommandOutput2("bjobs %(jobid)s 2>&1"%self.general)
56  if "is not found" in output: raise RuntimeError
57  except RuntimeError:
58  raise AllInOneError("%s is not a valid jobid.\nMaybe it finished already?"%self.jobid)
59 
60  self.cmssw = self.general["cmssw"]
61  badcharacters = r"\'"
62  for character in badcharacters:
63  if character in self.cmssw:
64  raise AllInOneError("The bad characters " + badcharacters + " are not allowed in the cmssw\n"
65  "path name. If you really have it in such a ridiculously named location,\n"
66  "try making a symbolic link somewhere with a decent name.")
67  try:
68  os.listdir(self.cmssw)
69  except OSError:
70  raise AllInOneError("Your cmssw release " + self.cmssw + ' does not exist')
71 
72  if self.cmssw == os.environ["CMSSW_BASE"]:
73  self.scramarch = os.environ["SCRAM_ARCH"]
74  self.cmsswreleasebase = os.environ["CMSSW_RELEASE_BASE"]
75  else:
76  command = ("cd '" + self.cmssw + "' && eval `scramv1 ru -sh 2> /dev/null`"
77  ' && echo "$CMSSW_BASE\n$SCRAM_ARCH\n$CMSSW_RELEASE_BASE"')
78  commandoutput = getCommandOutput2(command).split('\n')
79  self.cmssw = commandoutput[0]
80  self.scramarch = commandoutput[1]
81  self.cmsswreleasebase = commandoutput[2]
82 
83  self.packages = {}
84  for package in needpackages:
85  for placetolook in self.cmssw, self.cmsswreleasebase:
86  pkgpath = os.path.join(placetolook, "src", package)
87  if os.path.exists(pkgpath):
88  self.packages[package] = pkgpath
89  break
90  else:
91  raise AllInOneError("Package {} does not exist in {} or {}!".format(package, self.cmssw, self.cmsswreleasebase))
92 
93  self.AutoAlternates = True
94  if config.has_option("alternateTemplates","AutoAlternates"):
95  try:
96  self.AutoAlternates = json.loads(config.get("alternateTemplates","AutoAlternates").lower())
97  except ValueError:
98  raise AllInOneError("AutoAlternates needs to be true or false, not %s" % config.get("alternateTemplates","AutoAlternates"))
99 
100  knownOpts = defaults.keys()+mandatories
101  ignoreOpts = []
102  config.checkInput(valType+":"+self.name,
103  knownSimpleOptions = knownOpts,
104  ignoreOptions = ignoreOpts)
105 
106  def getRepMap(self, alignment = None):
107  if alignment == None:
108  alignment = self.alignmentToValidate
109  try:
110  result = PlottingOptions(self.config, self.valType)
111  except KeyError:
112  result = {}
113  result.update(alignment.getRepMap())
114  result.update( self.general )
115  result.update({
116  "workdir": os.path.join(self.general["workdir"],
117  self.randomWorkdirPart),
118  "datadir": self.general["datadir"],
119  "logdir": self.general["logdir"],
120  "CommandLineTemplate": ("#run configfile and post-proccess it\n"
121  "cmsRun %(cfgFile)s\n"
122  "%(postProcess)s "),
123  "CMSSW_BASE": self.cmssw,
124  "SCRAM_ARCH": self.scramarch,
125  "CMSSW_RELEASE_BASE": self.cmsswreleasebase,
126  "alignmentName": alignment.name,
127  "condLoad": alignment.getConditions(),
128  "condLoad": alignment.getConditions(),
129  })
130  result.update(self.packages)
131  return result
132 
133  def getCompareStrings( self, requestId = None, plain = False ):
134  result = {}
135  repMap = self.alignmentToValidate.getRepMap()
136  for validationId in self.filesToCompare:
137  repMap["file"] = self.filesToCompare[ validationId ]
138  if repMap["file"].startswith( "/castor/" ):
139  repMap["file"] = "rfio:%(file)s"%repMap
140  elif repMap["file"].startswith( "/store/" ):
141  repMap["file"] = "root://eoscms.cern.ch//eos/cms%(file)s"%repMap
142  if plain:
143  result[validationId]=repMap["file"]
144  else:
145  result[validationId]= "%(file)s=%(title)s|%(color)s|%(style)s"%repMap
146  if requestId == None:
147  return result
148  else:
149  if not "." in requestId:
150  requestId += ".%s"%GenericValidation.defaultReferenceName
151  if not requestId.split(".")[-1] in result:
152  msg = ("could not find %s in reference Objects!"
153  %requestId.split(".")[-1])
154  raise AllInOneError(msg)
155  return result[ requestId.split(".")[-1] ]
156 
157  def createFiles(self, fileContents, path, repMap = None, repMaps = None):
158  """repMap: single map for all files
159  repMaps: a dict, with the filenames as the keys"""
160  if repMap is not None and repMaps is not None:
161  raise AllInOneError("createFiles can only take repMap or repMaps (or neither), not both")
162  result = []
163  for fileName in fileContents:
164  filePath = os.path.join(path, fileName)
165  result.append(filePath)
166 
167  for (i, filePathi) in enumerate(addIndex(filePath, self.NJobs)):
168  theFile = open( filePathi, "w" )
169  fileContentsi = fileContents[ fileName ]
170  if repMaps is not None:
171  repMap = repMaps[fileName]
172  if repMap is not None:
173  repMap.update({"nIndex": str(i)})
174  fileContentsi = replaceByMap(fileContentsi, repMap)
175  theFile.write( fileContentsi )
176  theFile.close()
177 
178  return result
179 
180  def createConfiguration(self, fileContents, path, schedule = None, repMap = None, repMaps = None):
181  self.configFiles = GenericValidation.createFiles(self, fileContents,
182  path, repMap = repMap, repMaps = repMaps)
183  if not schedule == None:
184  schedule = [os.path.join( path, cfgName) for cfgName in schedule]
185  for cfgName in schedule:
186  if not cfgName in self.configFiles:
187  msg = ("scheduled %s missing in generated configfiles: %s"
188  %(cfgName, self.configFiles))
189  raise AllInOneError(msg)
190  for cfgName in self.configFiles:
191  if not cfgName in schedule:
192  msg = ("generated configuration %s not scheduled: %s"
193  %(cfgName, schedule))
194  raise AllInOneError(msg)
195  self.configFiles = schedule
196  return self.configFiles
197 
198  def createScript(self, fileContents, path, downloadFiles=[], repMap = None, repMaps = None):
199  self.scriptFiles = GenericValidation.createFiles(self, fileContents,
200  path, repMap = repMap, repMaps = repMaps)
201  for script in self.scriptFiles:
202  for scriptwithindex in addIndex(script, self.NJobs):
203  os.chmod(scriptwithindex,0o755)
204  return self.scriptFiles
205 
206  def createCrabCfg(self, fileContents, path ):
207  if self.NJobs > 1:
208  msg = ("jobmode 'crab' not supported for parallel validation."
209  " Please set parallelJobs = 1.")
210  raise AllInOneError(msg)
211  self.crabConfigFiles = GenericValidation.createFiles(self, fileContents,
212  path)
213  return self.crabConfigFiles
214 
215 
217  """
218  Subclass of `GenericValidation` which is the base for validations using
219  datasets.
220  """
221 
222  def __init__(self, valName, alignment, config, valType,
223  addDefaults = {}, addMandatories=[], addneedpackages=[]):
224  """
225  This method adds additional items to the `self.general` dictionary
226  which are only needed for validations using datasets.
227 
228  Arguments:
229  - `valName`: String which identifies individual validation instances
230  - `alignment`: `Alignment` instance to validate
231  - `config`: `BetterConfigParser` instance which includes the
232  configuration of the validations
233  - `valType`: String which specifies the type of validation
234  - `addDefaults`: Dictionary which contains default values for individual
235  validations in addition to the general default values
236  - `addMandatories`: List which contains mandatory parameters for
237  individual validations in addition to the general
238  mandatory parameters
239  """
240 
241  defaults = {"runRange": "",
242  "firstRun": "",
243  "lastRun": "",
244  "begin": "",
245  "end": "",
246  "JSON": ""
247  }
248  defaults.update(addDefaults)
249  mandatories = [ "dataset", "maxevents" ]
250  mandatories += addMandatories
251  needpackages = addneedpackages
252  GenericValidation.__init__(self, valName, alignment, config, valType, defaults, mandatories, needpackages)
253 
254  # if maxevents is not specified, cannot calculate number of events for
255  # each parallel job, and therefore running only a single job
256  if int( self.general["maxevents"] ) == -1 and self.NJobs > 1:
257  msg = ("Maximum number of events (maxevents) not specified: "
258  "cannot use parallel jobs.")
259  raise AllInOneError(msg)
260 
261  tryPredefinedFirst = (not self.jobmode.split( ',' )[0] == "crab" and self.general["JSON"] == ""
262  and self.general["firstRun"] == "" and self.general["lastRun"] == ""
263  and self.general["begin"] == "" and self.general["end"] == "")
264 
265  if self.general["dataset"] not in globalDictionaries.usedDatasets:
266  globalDictionaries.usedDatasets[self.general["dataset"]] = {}
267 
268  if self.cmssw not in globalDictionaries.usedDatasets[self.general["dataset"]]:
269  if globalDictionaries.usedDatasets[self.general["dataset"]] != {}:
270  print ("Warning: you use the same dataset '%s' in multiple cmssw releases.\n"
271  "This is allowed, but make sure it's not a mistake") % self.general["dataset"]
272  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw] = {False: None, True: None}
273 
274  if globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] is None:
275  dataset = Dataset(
276  self.general["dataset"], tryPredefinedFirst = tryPredefinedFirst,
277  cmssw = self.cmssw, cmsswrelease = self.cmsswreleasebase )
278  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] = dataset
279  if tryPredefinedFirst and not dataset.predefined(): #No point finding the data twice in that case
280  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][False] = dataset
281 
282  self.dataset = globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst]
283  self.general["magneticField"] = self.dataset.magneticField()
284  self.general["defaultMagneticField"] = "MagneticField"
285  if self.general["magneticField"] == "unknown":
286  print "Could not get the magnetic field for this dataset."
287  print "Using the default: ", self.general["defaultMagneticField"]
288  self.general["magneticField"] = '.oO[defaultMagneticField]Oo.'
289 
290  if not self.jobmode.split( ',' )[0] == "crab":
291  try:
292  self.general["datasetDefinition"] = self.dataset.datasetSnippet(
293  jsonPath = self.general["JSON"],
294  firstRun = self.general["firstRun"],
295  lastRun = self.general["lastRun"],
296  begin = self.general["begin"],
297  end = self.general["end"],
298  parent = self.needParentFiles )
299  except AllInOneError as e:
300  msg = "In section [%s:%s]: "%(valType, self.name)
301  msg += str(e)
302  raise AllInOneError(msg)
303  else:
304  if self.dataset.predefined():
305  msg = ("For jobmode 'crab' you cannot use predefined datasets "
306  "(in your case: '%s')."%( self.dataset.name() ))
307  raise AllInOneError( msg )
308  try:
309  theUpdate = config.getResultingSection(valType+":"+self.name,
310  demandPars = ["parallelJobs"])
311  except AllInOneError as e:
312  msg = str(e)[:-1]+" when using 'jobmode: crab'."
313  raise AllInOneError(msg)
314  self.general.update(theUpdate)
315  if self.general["begin"] or self.general["end"]:
316  ( self.general["begin"],
317  self.general["end"],
318  self.general["firstRun"],
319  self.general["lastRun"] ) = self.dataset.convertTimeToRun(
320  firstRun = self.general["firstRun"],
321  lastRun = self.general["lastRun"],
322  begin = self.general["begin"],
323  end = self.general["end"],
324  shortTuple = False)
325  if self.general["begin"] == None:
326  self.general["begin"] = ""
327  if self.general["end"] == None:
328  self.general["end"] = ""
329  self.general["firstRun"] = str( self.general["firstRun"] )
330  self.general["lastRun"] = str( self.general["lastRun"] )
331  if ( not self.general["firstRun"] ) and \
332  ( self.general["end"] or self.general["lastRun"] ):
333  self.general["firstRun"] = str(
334  self.dataset.runList()[0]["run_number"])
335  if ( not self.general["lastRun"] ) and \
336  ( self.general["begin"] or self.general["firstRun"] ):
337  self.general["lastRun"] = str(
338  self.dataset.runList()[-1]["run_number"])
339  if self.general["firstRun"] and self.general["lastRun"]:
340  if int(self.general["firstRun"]) > int(self.general["lastRun"]):
341  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
342  "chosen is greater than the upper time/runrange limit "
343  "('end'/'lastRun').")
344  raise AllInOneError( msg )
345  self.general["runRange"] = (self.general["firstRun"]
346  + '-' + self.general["lastRun"])
347  try:
348  self.general["datasetDefinition"] = self.dataset.datasetSnippet(
349  jsonPath = self.general["JSON"],
350  firstRun = self.general["firstRun"],
351  lastRun = self.general["lastRun"],
352  begin = self.general["begin"],
353  end = self.general["end"],
354  crab = True )
355  except AllInOneError as e:
356  msg = "In section [%s:%s]: "%(valType, self.name)
357  msg += str( e )
358  raise AllInOneError( msg )
359 
360  def getRepMap(self, alignment = None):
361  result = GenericValidation.getRepMap(self, alignment)
362  outputfile = os.path.expandvars(replaceByMap(
363  "%s_%s_.oO[name]Oo..root" % (self.outputBaseName, self.name)
364  , result))
365  resultfile = os.path.expandvars(replaceByMap(("/store/caf/user/$USER/.oO[eosdir]Oo./" +
366  "%s_%s_.oO[name]Oo..root" % (self.resultBaseName, self.name))
367  , result))
368  result.update({
369  "resultFile": ".oO[resultFiles[.oO[nIndex]Oo.]]Oo.",
370  "resultFiles": addIndex(resultfile, self.NJobs),
371  "finalResultFile": resultfile,
372  "outputFile": ".oO[outputFiles[.oO[nIndex]Oo.]]Oo.",
373  "outputFiles": addIndex(outputfile, self.NJobs),
374  "finalOutputFile": outputfile
375  })
376  return result
377 
378  def createScript(self, path, template = configTemplates.scriptTemplate, downloadFiles=[], repMap = None, repMaps = None):
379  scriptName = "%s.%s.%s.sh"%(self.scriptBaseName, self.name,
380  self.alignmentToValidate.name )
381  if repMap is None and repMaps is None:
382  repMap = self.getRepMap()
383  repMap["CommandLine"]=""
384  for cfg in self.configFiles:
385  repMap["CommandLine"]+= repMap["CommandLineTemplate"]%{"cfgFile":addIndex(cfg, self.NJobs, ".oO[nIndex]Oo."),
386  "postProcess":""
387  }
388  scripts = {scriptName: template}
389  return GenericValidation.createScript(self, scripts, path, downloadFiles = downloadFiles,
390  repMap = repMap, repMaps = repMaps)
391 
392  def createCrabCfg(self, path, crabCfgBaseName):
393  """
394  Method which creates a `crab.cfg` for a validation on datasets.
395 
396  Arguments:
397  - `path`: Path at which the file will be stored.
398  - `crabCfgBaseName`: String which depends on the actual type of
399  validation calling this method.
400  """
401  crabCfgName = "crab.%s.%s.%s.cfg"%( crabCfgBaseName, self.name,
402  self.alignmentToValidate.name )
403  repMap = self.getRepMap()
404  repMap["script"] = "dummy_script.sh"
405  # repMap["crabOutputDir"] = os.path.basename( path )
406  repMap["crabWorkingDir"] = crabCfgName.split( '.cfg' )[0]
407  self.crabWorkingDir = repMap["crabWorkingDir"]
408  repMap["numberOfJobs"] = self.general["parallelJobs"]
409  repMap["cfgFile"] = self.configFiles[0]
410  repMap["queue"] = self.jobmode.split( ',' )[1].split( '-q' )[1]
411  if self.dataset.dataType() == "mc":
412  repMap["McOrData"] = "events = .oO[nEvents]Oo."
413  elif self.dataset.dataType() == "data":
414  repMap["McOrData"] = "lumis = -1"
415  if self.jobmode.split( ',' )[0] == "crab":
416  print ("For jobmode 'crab' the parameter 'maxevents' will be "
417  "ignored and all events will be processed.")
418  else:
419  raise AllInOneError("Unknown data type! Can't run in crab mode")
420  crabCfg = {crabCfgName: replaceByMap( configTemplates.crabCfgTemplate,
421  repMap ) }
422  return GenericValidation.createCrabCfg( self, crabCfg, path )
def replaceByMap
— Helpers —############################
double split
Definition: MVATrainer.cc:139